Beispiel #1
0
  public static void addImport(Context context, ClassDefinitionNode node, String packageName) {
    NodeFactory nodeFactory = new NodeFactory(context);

    PackageIdentifiersNode packageIdentifiers = null;

    StringTokenizer stringTokenizer = new StringTokenizer(packageName, ".");

    while (stringTokenizer.hasMoreTokens()) {
      String token = stringTokenizer.nextToken();

      IdentifierNode identifier = nodeFactory.identifier(token);

      packageIdentifiers = nodeFactory.packageIdentifiers(packageIdentifiers, identifier, true);
    }

    PackageNameNode packageNameNode = nodeFactory.packageName(packageIdentifiers);

    ImportDirectiveNode importDirective =
        nodeFactory.importDirective(null, packageNameNode, null, context);

    importDirective.pkgdef = node.pkgdef;

    if (node.statements == null) {
      node.statements = new StatementListNode(null);
    }

    node.statements.items.add(0, importDirective);
  }
 private static void loadObject(final AbstractGetInfo<?> data, final NodeFactory node) {
   if (node.getInfos() != null) {
     final String[] keys =
         node.getInfos().keySet().toArray(new String[node.getInfos().keySet().size()]);
     for (final String key : keys) {
       data.setInfo(key + EMPTY, node.getInfos().get(key));
     }
   }
 }
  /*
   * Generate the states array initializer (AST).
   */
  private StatementListNode genStatesAST(
      StatesModel model,
      NodeFactory nodeFactory,
      HashSet<String> configNamespaces,
      boolean generateDocComments,
      StatementListNode statementList) {
    StatementListNode result = statementList;

    Set<String> states = model.info.getStateNames();
    if (!states.isEmpty()) {
      ArgumentListNode statesArgumentList = null;

      for (Iterator<String> iter = states.iterator(); iter.hasNext(); ) {
        State state = (State) model.stateByName((String) iter.next());
        if (state != null) {
          MemberExpressionNode stateExpression =
              state.generateDefinitionBody(
                  nodeFactory, configNamespaces,
                  generateDocComments, bindingsQueue);
          statesArgumentList = nodeFactory.argumentList(statesArgumentList, stateExpression);
        }
      }

      LiteralArrayNode literalArray = nodeFactory.literalArray(statesArgumentList);
      ArgumentListNode argList = nodeFactory.argumentList(null, literalArray);
      IdentifierNode statesIdentifier = nodeFactory.identifier(STATES, false);
      SetExpressionNode selector = nodeFactory.setExpression(statesIdentifier, argList, false);
      MemberExpressionNode memberExpression = nodeFactory.memberExpression(null, selector);
      ListNode list = nodeFactory.list(null, memberExpression);
      ExpressionStatementNode expressionStatement = nodeFactory.expressionStatement(list);
      result = nodeFactory.statementList(result, expressionStatement);
    }
    return result;
  }
  private static Node readNode(TrieReader reader, int depth, int maxDepth) throws IOException {

    if (depth > maxDepth) {
      skipNode(reader);
      return null;
    }

    long count = reader.readCount();

    int depthPlus1 = depth + 1;

    long sym1 = reader.readSymbol();

    // 0+ daughters
    if (sym1 == -1L) return NodeFactory.createNode(count);

    // 1+ daughters
    Node node1 = readNode(reader, depthPlus1, maxDepth);
    long sym2 = reader.readSymbol();
    if (sym2 == -1L) return NodeFactory.createNodeFold((char) sym1, node1, count);

    Node node2 = readNode(reader, depthPlus1, maxDepth);
    long sym3 = reader.readSymbol();
    if (sym3 == -1L) return NodeFactory.createNode((char) sym1, node1, (char) sym2, node2, count);

    Node node3 = readNode(reader, depthPlus1, maxDepth);
    long sym4 = reader.readSymbol();
    if (sym4 == -1L)
      return NodeFactory.createNode(
          (char) sym1, node1, (char) sym2, node2, (char) sym3, node3, count);
    Node node4 = readNode(reader, depthPlus1, maxDepth);

    // 4+ daughters
    StringBuilder cBuf = new StringBuilder();
    cBuf.append((char) sym1);
    cBuf.append((char) sym2);
    cBuf.append((char) sym3);
    cBuf.append((char) sym4);

    List<Node> nodeList = new ArrayList<Node>();
    nodeList.add(node1);
    nodeList.add(node2);
    nodeList.add(node3);
    nodeList.add(node4);

    long sym;

    while ((sym = reader.readSymbol()) != -1L) {
      cBuf.append((char) sym);
      nodeList.add(readNode(reader, depthPlus1, maxDepth));
    }
    Node[] nodes = nodeList.toArray(EMPTY_NODE_ARRAY);
    char[] cs = Strings.toCharArray(cBuf);
    return NodeFactory.createNode(cs, nodes, count); // > 3 daughters
  }
Beispiel #5
0
  // returns list of statements
  protected static void replaceMultSPO(
      Statement st, NodeFactory f, Map o2n, Collection result, RDFNode toReplace, int position)
      throws ModelException {

    Collection replacements;

    if (toReplace instanceof Statement) {

      List l = new ArrayList();
      replaceMult((Statement) toReplace, f, o2n, l);

      if (l.size() == 1 && toReplace == l.get(0)) {
        result.add(st);
        return; // keep the same
      } else replacements = l;

    } else {

      Object ro = o2n.get(toReplace);

      if (ro instanceof Collection) replacements = (Collection) ro;
      else if (ro != null) {

        replacements = new ArrayList();
        replacements.add(ro);

      } else { // no replacement needed

        result.add(st); // keep the same statement
        return;
      }
    }

    for (Iterator it = replacements.iterator(); it.hasNext(); ) {

      Statement rs = null;
      Object rr = it.next();

      switch (position) {
        case 0:
          rs = f.createStatement((Resource) rr, st.predicate(), st.object());
          break;
        case 1:
          rs = f.createStatement(st.subject(), (Resource) rr, st.object());
          break;
        case 2:
          rs = f.createStatement(st.subject(), st.predicate(), (RDFNode) rr);
          break;
      }
      result.add(rs);
    }
  }
Beispiel #6
0
  static <E extends ValueNode> E createNode(NodeFactory<E> factory, Object... constants) {
    ArgumentNode[] argumentNodes = arguments(factory.getExecutionSignature().size());

    List<Object> argumentList = new ArrayList<>();
    argumentList.addAll(Arrays.asList(constants));
    if (ChildrenNode.class.isAssignableFrom(factory.getNodeClass())
        || BuiltinNode.class.isAssignableFrom(factory.getNodeClass())) {
      argumentList.add(argumentNodes);
    } else {
      argumentList.addAll(Arrays.asList(argumentNodes));
    }
    return factory.createNode(argumentList.toArray(new Object[argumentList.size()]));
  }
  public void testFeatures() {
    assertEquals("nb features", 2, factory.getFeatures().size());

    Node node;

    node = factory.getFeatures().get("test.main(java.lang.String[])");
    assertNotNull("feature test.main(java.lang.String[]) missing", node);
    assertTrue("feature test.main(java.lang.String[]) not concrete", node.isConfirmed());

    node = factory.getFeatures().get("test.test()");
    assertNotNull("feature test.test() missing", node);
    assertTrue("feature test.test() not concrete", node.isConfirmed());
  }
  public void testRetrieveTriplesByNode() {
    Graph G = getGraph();
    Node N = NodeFactory.createBlankNode(), M = NodeFactory.createBlankNode();
    ReifierStd.reifyAs(G, N, triple("x R y"));
    assertEquals("gets correct triple", triple("x R y"), ReifierStd.getTriple(G, N));
    ReifierStd.reifyAs(G, M, triple("p S q"));
    assertDiffer("the anon nodes must be distinct", N, M);
    assertEquals("gets correct triple", triple("p S q"), ReifierStd.getTriple(G, M));

    assertTrue("node is known bound", ReifierStd.hasTriple(G, M));
    assertTrue("node is known bound", ReifierStd.hasTriple(G, N));
    assertFalse(
        "node is known unbound", ReifierStd.hasTriple(G, NodeFactory.createURI("any:thing")));
  }
Beispiel #9
0
 public Set<Node> getNodes() {
   Set<Node> nodes = new HashSet<Node>();
   for (org.eclipse.uml2.uml.ActivityNode n : uml_activity.getNodes()) {
     nodes.add(NodeFactory.getInstance(n));
   }
   return nodes;
 }
  public void testFeatureFeature() {
    a.addDependency(b);
    a.addDependency(b_B);
    a.addDependency(b_B_b);
    a_A.addDependency(b);
    a_A.addDependency(b_B);
    a_A.addDependency(b_B_b);
    a_A_a.addDependency(b);
    a_A_a.addDependency(b_B);
    a_A_a.addDependency(b_B_b);

    Visitor visitor = new LinkMinimizer();
    visitor.traverseNodes(factory.getPackages().values());

    assertEquals("a outbound", 0, a.getOutboundDependencies().size());
    assertEquals("a inbound", 0, a.getInboundDependencies().size());
    assertEquals("a_A outbound", 0, a_A.getOutboundDependencies().size());
    assertEquals("a_A inbound", 0, a_A.getInboundDependencies().size());
    assertEquals("a_A_a outbound", 1, a_A_a.getOutboundDependencies().size());
    assertTrue("Missing a.A.a --> b.B.b", a_A_a.getOutboundDependencies().contains(b_B_b));
    assertEquals("a_A_a inbound", 0, a_A_a.getInboundDependencies().size());
    assertEquals("b outbound", 0, b.getOutboundDependencies().size());
    assertEquals("b inbound", 0, b.getInboundDependencies().size());
    assertEquals("b_B outbound", 0, b_B.getOutboundDependencies().size());
    assertEquals("b_B inbound", 0, b_B.getInboundDependencies().size());
    assertEquals("b_B_b outbound", 0, b_B_b.getOutboundDependencies().size());
    assertEquals("b_B_b inbound", 1, b_B_b.getInboundDependencies().size());
    assertTrue("Missing b.B.b <-- a.A.a", b_B_b.getInboundDependencies().contains(a_A_a));
  }
Beispiel #11
0
 public void addNodes(int[] nodes, int before) {
   Prop vec[] = new Prop[nodes.length];
   for (int i = 0; i < nodes.length; i++) {
     vec[i] = mFactory.makeNode(nodes[i]);
   }
   mCb.addNodes(vec, before);
 }
  private Node instantiateNodeForRecipe(
      RecipeTraceInfo recipeTrace,
      final ReteNodeRecipe recipe,
      Collection<ReteNodeRecipe> sameClassRecipes) {
    if (recipe instanceof IndexerRecipe) {

      // INSTANTIATE AND HOOK UP
      // (cannot delay hooking up, because parent determines indexer implementation)
      ensureParents(recipeTrace);
      final ReteNodeRecipe parentRecipe =
          recipeTrace.getParentRecipeTraces().iterator().next().getRecipe();
      final Indexer result =
          nodeFactory.createIndexer(
              reteContainer,
              (IndexerRecipe) recipe,
              asSupplier(
                  (Address<? extends Supplier>)
                      reteContainer.network.getExistingNodeByRecipe(parentRecipe)),
              recipeTrace);

      // REMEMBER
      if (Options.nodeSharingOption != Options.NodeSharingOption.NEVER) {
        getNodesByRecipe().put(recipe, reteContainer.makeAddress(result));
        sameClassRecipes.add(recipe);
      }

      return result;
    } else {

      // INSTANTIATE
      Node result = nodeFactory.createNode(reteContainer, recipe, recipeTrace);

      // REMEMBER
      if (Options.nodeSharingOption == Options.NodeSharingOption.ALL) {
        getNodesByRecipe().put(recipe, reteContainer.makeAddress(result));
        sameClassRecipes.add(recipe);
      }

      // HOOK UP
      // (recursion-tolerant due to this delayed order of initialization)
      ensureParents(recipeTrace);
      if (recipe instanceof InputRecipe) inputConnector.connectInput((InputRecipe) recipe, result);
      else connectionFactory.connectToParents(recipeTrace, result);

      return result;
    }
  }
  public void testClasses() {
    assertEquals("nb classes", 3, factory.getClasses().size());

    Node node;

    node = factory.getClasses().get("test");
    assertNotNull("class test missing", node);
    assertTrue("class test not concrete", node.isConfirmed());

    node = factory.getClasses().get("java.io.PrintStream");
    assertNotNull("class java.io.PrintStream missing", node);
    assertFalse("class java.io.PrintStream is concrete", node.isConfirmed());

    node = factory.getClasses().get("java.util.Set");
    assertNotNull("class java.util.Set missing", node);
    assertFalse("class java.util.Set is concrete", node.isConfirmed());
  }
  public void testPackages() {
    assertEquals("nb packages", 3, factory.getPackages().size());

    Node node;

    node = factory.getPackages().get("");
    assertNotNull("default package missing", node);
    assertTrue("default package not concrete", node.isConfirmed());

    node = factory.getPackages().get("java.io");
    assertNotNull("package java.io missing", node);
    assertFalse("package java.io is concrete", node.isConfirmed());

    node = factory.getPackages().get("java.util");
    assertNotNull("package java.util missing", node);
    assertFalse("package java.util is concrete", node.isConfirmed());
  }
Beispiel #15
0
  public static Resource createGuessedResource(NodeFactory f, String uri) throws ModelException {

    int l = getNamespaceEnd(uri);
    String ns = l > 1 ? uri.substring(0, l) : null;
    String name = uri.substring(l);

    return f.createResource(ns, name);
  }
 /**
  * Removes strings with counts below the specified minimum. Counts for remaining strings are not
  * affected. Pruning may be interleaved with updating counts in any order.
  *
  * @param minCount Minimum count required to retain a substring count.
  * @throws IllegalArgumentException If the count is less than <code>1</code>.
  */
 public void prune(int minCount) {
   if (minCount < 1) {
     String msg = "Prune minimum count must be more than 1." + " Found minCount=" + minCount;
     throw new IllegalArgumentException(msg);
   }
   mRootNode = mRootNode.prune(minCount);
   if (mRootNode == null) mRootNode = NodeFactory.createNode(0);
 }
  /*
   * Generates the initializers (AST) for all values that are shared between states (e.g. all instance factories
   * shared by AddItems overrides).
   */
  private StatementListNode genSharedFactoriesAST(
      StatesModel model, NodeFactory nodeFactory, StatementListNode statementList) {
    Map<String, SharedObject> shared = model.sharedObjects;

    StatementListNode result = statementList;

    for (Iterator<String> iter = shared.keySet().iterator(); iter.hasNext(); ) {
      SharedObject symbol = shared.get(iter.next());

      String varName = ((String) symbol.name + _FACTORY).intern();
      String typeName = NameFormatter.retrieveClassName(DEFERREDINSTANCEFROMFUNCTION);
      String factory = symbol.name + (symbol.model.isDeclared() ? _I : _C);
      String resetFunc = symbol.name + _R;

      MemberExpressionNode memberExpression =
          AbstractSyntaxTreeUtil.generateGetterSelector(nodeFactory, factory, true);

      ArgumentListNode callExpressionArgumentList =
          nodeFactory.argumentList(null, memberExpression);

      if (symbol.model.getIsTransient()) {
        memberExpression =
            AbstractSyntaxTreeUtil.generateGetterSelector(nodeFactory, resetFunc, true);
        callExpressionArgumentList =
            nodeFactory.argumentList(callExpressionArgumentList, memberExpression);
      }

      QualifiedIdentifierNode qualifiedIdentifier =
          AbstractSyntaxTreeUtil.generateQualifiedIdentifier(
              nodeFactory, standardDefs.getCorePackage(), typeName, false);

      CallExpressionNode callExpression =
          (CallExpressionNode)
              nodeFactory.callExpression(qualifiedIdentifier, callExpressionArgumentList);
      callExpression.is_new = true;
      callExpression.setRValue(false);

      MemberExpressionNode ad = nodeFactory.memberExpression(null, callExpression);

      VariableDefinitionNode variableDefinition =
          AbstractSyntaxTreeUtil.generateVariable(nodeFactory, varName, typeName, false, ad);
      result = nodeFactory.statementList(result, variableDefinition);
    }
    return result;
  }
Beispiel #18
0
  /** Flatten complex expressions within the AST */
  public Node leave(Node old, Node n, NodeVisitor v) {
    if (n == noFlatten) {
      noFlatten = null;
      return n;
    }

    if (n instanceof Block) {
      List l = (List) stack.removeFirst();
      return ((Block) n).statements(l);
    } else if (n instanceof Stmt && !(n instanceof LocalDecl)) {
      List l = (List) stack.getFirst();
      l.add(n);
      return n;
    } else if (n instanceof Expr
        && !(n instanceof Lit)
        && !(n instanceof Special)
        && !(n instanceof Local)) {

      Expr e = (Expr) n;

      if (e instanceof Assign) {
        return n;
      }

      // create a local temp, initialized to the value of the complex
      // expression

      String name = newID();
      LocalDecl def =
          nf.LocalDecl(
              e.position(), Flags.FINAL, nf.CanonicalTypeNode(e.position(), e.type()), name, e);
      def = def.localInstance(ts.localInstance(e.position(), Flags.FINAL, e.type(), name));

      List l = (List) stack.getFirst();
      l.add(def);

      // return the local temp instead of the complex expression
      Local use = nf.Local(e.position(), name);
      use = (Local) use.type(e.type());
      use = use.localInstance(ts.localInstance(e.position(), Flags.FINAL, e.type(), name));
      return use;
    }

    return n;
  }
Beispiel #19
0
 @SuppressWarnings("unchecked")
 private void expression() {
   term();
   while (token == Lexer.OR) {
     NonTerminal or = NodeFactory.createNonTerminal(token);
     or.setLeft(root);
     term();
     or.setRight(root);
     root = or;
   }
 }
Beispiel #20
0
 @SuppressWarnings("unchecked")
 private void term() {
   factor();
   while (token == Lexer.AND) {
     NonTerminal and = NodeFactory.createNonTerminal(token);
     and.setLeft(root);
     factor();
     and.setRight(root);
     root = and;
   }
 }
Beispiel #21
0
  public static TreeSet<Object> filter(NodeFactory<Boolean> cmp, TreeSet<Object> s) {
    TreeSet<Object> ret = (TreeSet<Object>) s.clone();

    Iterator<Object> x = ret.iterator();
    while (x.hasNext()) {
      if (!cmp.invoke(new Object[] {x.next()}, null)) {
        x.remove();
      }
    }

    return ret;
  }
Beispiel #22
0
  public static Statement replaceNamespace(
      Statement st, String o, String n, NodeFactory f, Map o2n, Set resourcesToIgnore)
      throws ModelException {

    boolean replaced = false;
    Resource subj = st.subject();
    Resource pred = st.predicate();
    RDFNode obj = st.object();

    if (obj instanceof Resource
        && !(obj instanceof Statement)
        && o.equals(((Resource) obj).getNamespace())
        && (resourcesToIgnore == null || !resourcesToIgnore.contains(obj))) {

      replaced = true;
      Resource r = f.createResource(n, ((Resource) obj).getLocalName());
      if (o2n != null) o2n.put(obj, r);
      obj = r;
    }

    if (o.equals(subj.getNamespace())
        && (resourcesToIgnore == null || !resourcesToIgnore.contains(subj))) {

      replaced = true;
      Resource r = f.createResource(n, subj.getLocalName());
      if (o2n != null) o2n.put(subj, r);
      subj = r;
    }

    if (o.equals(pred.getNamespace())
        && (resourcesToIgnore == null || !resourcesToIgnore.contains(pred))) {

      replaced = true;
      Resource r = f.createResource(n, pred.getLocalName());
      if (o2n != null) o2n.put(pred, r);
      pred = r;
    }
    return replaced ? f.createStatement(subj, pred, obj) : st;
  }
Beispiel #23
0
 private void value() {
   if (token == Lexer.VARIABLE || token == Lexer.NUMBER) {
     root = NodeFactory.createTerminal(token, lexer.getValue());
     if (token == Lexer.VARIABLE && allowedIdentifiers != null) {
       if (!allowedIdentifiers.contains(root.getSymbol()))
         throw new MalformedExpressionException(
             String.format("Unknown identifier '%s'", root.getSymbol()));
     }
     token = lexer.nextToken();
   } else {
     throw new MalformedExpressionException(
         String.format("Value instead of <%s> expected.", token));
   }
 }
  protected void setUp() throws Exception {
    factory = new NodeFactory();

    a = factory.createPackage("a");
    a_A = factory.createClass("a.A");
    a_A_a = factory.createFeature("a.A.a()");

    b = factory.createPackage("b");
    b_B = factory.createClass("b.B");
    b_B_b = factory.createFeature("b.B.b()");
  }
  /*
   * Generate all AST calls to instantiate itemCreationPolicy='immediate' nodes.
   */
  private StatementListNode genImmediateInitsAST(
      StatesModel model, NodeFactory nodeFactory, StatementListNode statementList) {
    StatementListNode result = statementList;
    List<String> objects = model.earlyInitObjects;

    for (Iterator<String> iter = objects.iterator(); iter.hasNext(); ) {
      String symbol = iter.next();
      String identifier = ((String) symbol + "_factory").intern();
      IdentifierNode idNode = nodeFactory.identifier(identifier, false);
      GetExpressionNode getIndexExpression = nodeFactory.getExpression(idNode);
      MemberExpressionNode base = nodeFactory.memberExpression(null, getIndexExpression);

      IdentifierNode getNode = nodeFactory.identifier(GETINSTANCE, false);

      CallExpressionNode selector = (CallExpressionNode) nodeFactory.callExpression(getNode, null);
      selector.setRValue(false);

      MemberExpressionNode memberExpression = nodeFactory.memberExpression(base, selector);
      ListNode list = nodeFactory.list(null, memberExpression);
      ExpressionStatementNode expressionStatement = nodeFactory.expressionStatement(list);
      result = nodeFactory.statementList(result, expressionStatement);
    }
    return result;
  }
Beispiel #26
0
 @SuppressWarnings("unchecked")
 private void condition() {
   value();
   if (token == Lexer.GREATER
       || token == Lexer.GREATEROREQUAL
       || token == Lexer.LESS
       || token == Lexer.LESSOREQUAL
       || token == Lexer.EQUAL
       || token == Lexer.NOTEQUAL) {
     NonTerminal condition = NodeFactory.createNonTerminal(token);
     condition.setLeft(root);
     token = lexer.nextToken();
     value();
     condition.setRight(root);
     root = condition;
   } else {
     throw new MalformedExpressionException(
         String.format("Conditional operator instead of <%s> expected.", token));
   }
 }
Beispiel #27
0
  /** Appends additional sqlWhere in relationship if any to the end of the join. */
  private void appendAdditionalJoinCondition(
      SqlWriter writer,
      DatabaseType databaseType,
      final String sourceVarName,
      final String targetVarName) {
    if (relationship.getSqlFilter() == null || relationship.getSqlFilter().length() == 0) {
      return;
    }
    Node node = NodeFactory.parseExpression(relationship.getSqlFilter());
    NodeVisitor visit =
        new AbstractNodeVisitor() {
          @Override
          public boolean visitIdentifier(Identifier identifier) {
            String path = identifier.getName();
            int dotPos = path.indexOf('.');
            if (dotPos < 0) {
              throw new EJBQLException(
                  "Invalid sqlWhere in relationship: "
                      + relationship
                      + " - "
                      + relationship.getSqlFilter());
            }
            String entityName = path.substring(0, dotPos);
            if (entityName.equals(relationship.getSourceEntity().getSystemName())
                || entityName.equals(relationship.getSourceEntity().getTableName())) {
              identifier.setName(sourceVarName + path.substring(dotPos));
            } else if (entityName.equals(relationship.getTargetEntity().getSystemName())
                || entityName.equals(relationship.getTargetEntity().getTableName())) {
              identifier.setName(targetVarName + path.substring(dotPos));
            }

            return true;
          }
        };

    node.accept(visit);

    writer.write(" AND ");
    node.toString(writer, databaseType);
  }
Beispiel #28
0
  public static Statement replaceResources(Statement st, NodeFactory f, Map o2n)
      throws ModelException {

    boolean replaced = false;
    Resource subj = st.subject();
    Resource pred = st.predicate();
    RDFNode obj = st.object();

    Object n = null;

    if (obj instanceof Statement) {

      n = obj;
      obj = replaceResources((Statement) obj, f, o2n);
      replaced = n != obj;

    } else if ((n = o2n.get(obj)) != null) {
      replaced = true;
      obj = (RDFNode) n;
    }

    if (subj instanceof Statement) {

      n = subj;
      subj = replaceResources((Statement) subj, f, o2n);
      replaced = n != subj;
    }
    if ((n = o2n.get(subj)) != null) {
      replaced = true;
      subj = (Resource) n;
    }

    if ((n = o2n.get(pred)) != null) {
      replaced = true;
      pred = (Resource) n;
    }
    return replaced ? f.createStatement(subj, pred, obj) : st;
  }
/**
 * A <code>TrieCharSeqCounter</code> stores counts for substrings of strings. When the counter is
 * constructed, a maximum length is specified, and counts are only stored for strings up to that
 * length. For instance, an n-gram language model needs only counts for strings up to length n.
 *
 * <p>Strings may be added to the counter using {@link #incrementSubstrings(char[],int,int)}, which
 * increments the counts for all substrings of the specified character slice up to the specified
 * maximum length substring. The method {@link #incrementPrefixes(char[],int,int)} increments only
 * the prefixes of the specified string. All substrings are incremented by incrementing prefixes for
 * each suffix. A substring counter may be pruned using {@link #prune(int)}, which removes all
 * substrings with count below the specified threshold.
 *
 * <p>There are a wide range of reporting methods for trie-based counters.
 *
 * <p><i>Implementation Note:</i> The trie counters are a heavily unfolded implementation of a
 * character-based Patricia (PAT) trie.
 *
 * @author Bob Carpenter
 * @version 3.8
 * @since LingPipe2.0
 */
public class TrieCharSeqCounter implements CharSeqCounter {

  Node mRootNode = NodeFactory.createNode(0);
  final int mMaxLength;

  /**
   * Construct a substring counter that stores substrings up to the specified maximum length.
   *
   * @param maxLength Maximum length of substrings stored by this counter.
   * @throws IllegalArgumentException If the maximum length is negative.
   */
  public TrieCharSeqCounter(int maxLength) {
    if (maxLength < 0) {
      String msg = "Max length must be >= 0." + " Found length=" + maxLength;
      throw new IllegalArgumentException(msg);
    }
    mMaxLength = maxLength;
  }

  // following is CharSeqCounter interface w. inherited comments

  public long count(char[] cs, int start, int end) {
    Strings.checkArgsStartEnd(cs, start, end);
    return mRootNode.count(cs, start, end);
  }

  public long extensionCount(char[] cs, int start, int end) {
    Strings.checkArgsStartEnd(cs, start, end);
    return mRootNode.contextCount(cs, start, end);
  }

  public char[] observedCharacters() {
    return com.aliasi.util.Arrays.copy(mRootNode.outcomes(new char[] {}, 0, 0));
  }

  public char[] charactersFollowing(char[] cs, int start, int end) {
    Strings.checkArgsStartEnd(cs, start, end);
    return com.aliasi.util.Arrays.copy(mRootNode.outcomes(cs, start, end));
  }

  public int numCharactersFollowing(char[] cs, int start, int end) {
    Strings.checkArgsStartEnd(cs, start, end);
    return mRootNode.numOutcomes(cs, start, end);
  }

  /**
   * Returns the sum of counts for all non-empty character sequences.
   *
   * @return The sum of counts for all non-empty character sequences.
   */
  public long totalSequenceCount() {
    long sum = 0l;
    long[][] uniqueTotals = uniqueTotalNGramCount();
    for (int i = 0; i < uniqueTotals.length; ++i) sum += uniqueTotals[i][1];
    return sum;
  }

  /**
   * Returns the sum of the counts of all character sequences of the specified length.
   *
   * @return The sum of the counts of all character sequences of the specified length.
   */
  public long totalSequenceCount(int length) {
    return mRootNode.totalNGramCount(length);
  }

  /**
   * Returns the number of character sequences with non-zero counts, including the empty (zero
   * length) character sequence.
   *
   * @return Number of character sequences with non-zero counts.
   */
  public long uniqueSequenceCount() {
    return mRootNode.size();
  }

  /**
   * Returns the number of character sequences of the specified length with non-zero counts.
   *
   * @return The number of character sequences of the specified length with non-zero counts.
   */
  public long uniqueSequenceCount(int nGramOrder) {
    return mRootNode.uniqueNGramCount(nGramOrder);
  }

  /**
   * Removes strings with counts below the specified minimum. Counts for remaining strings are not
   * affected. Pruning may be interleaved with updating counts in any order.
   *
   * @param minCount Minimum count required to retain a substring count.
   * @throws IllegalArgumentException If the count is less than <code>1</code>.
   */
  public void prune(int minCount) {
    if (minCount < 1) {
      String msg = "Prune minimum count must be more than 1." + " Found minCount=" + minCount;
      throw new IllegalArgumentException(msg);
    }
    mRootNode = mRootNode.prune(minCount);
    if (mRootNode == null) mRootNode = NodeFactory.createNode(0);
  }

  /**
   * Returns an array of frequency counts for n-grams of the specified n-gram order sorted in
   * descending frequency order. This form of result is sometimes called a Zipf plot because of the
   * sorting.
   *
   * @param nGramOrder Order of n-gram counted.
   * @return Array of frequency counts, sorted in decreasing order of rank.
   */
  public int[] nGramFrequencies(int nGramOrder) {
    List<Long> counts = countsList(nGramOrder);
    int[] result = new int[counts.size()];
    for (int i = 0; i < result.length; ++i) result[i] = counts.get(i).intValue();
    java.util.Arrays.sort(result);
    for (int i = result.length / 2; i >= 0; --i) {
      int iOpp = result.length - i - 1;
      int tmp = result[i];
      result[i] = result[iOpp];
      result[iOpp] = tmp;
    }
    return result;
  }

  /**
   * Returns the array of unique and total n-gram counts for each n-gram length. The return array is
   * indexed in the first position by n-gram length, and in the second position by <code>0</code>
   * for unique counts and <code>1</code> for total counts. Thus for <code>0&lt;=n&lt;=maxLength()
   * </code>:
   *
   * <blockquote>
   *
   * <code>
   * uniqueTotalNGramCount()[n][0] == uniqueNGramCount(n)
   * </code>
   *
   * </blockquote>
   *
   * and
   *
   * <blockquote>
   *
   * <code>
   * uniqueTotalNGramCount()[n][1] == totalNGramCount(n)
   * </code>
   *
   * </blockquote>
   *
   * If unique and total counts are required for several n-gram depths, this method is much more
   * efficient than calling all of the individual methods separately.
   *
   * @return The array of unique and total n-gram counts for each n-gram length.
   */
  public long[][] uniqueTotalNGramCount() {
    long[][] result = new long[mMaxLength + 1][2];
    mRootNode.addNGramCounts(result, 0);
    return result;
  }

  /**
   * Returns a counter of occurrences of the highest frequency n-grams of a specified n-gram order.
   * The actual n-grams are represented as strings in the result; recall that strings are instances
   * of {@link CharSequence}.
   *
   * <p>The maximum number of results returned must be specified, because the entire set of n-grams
   * is usually too large to return as a counter.
   *
   * @param nGramOrder Order of n-gram to count.
   * @param maxReturn Maximum number of objects returned.
   */
  public ObjectToCounterMap<String> topNGrams(int nGramOrder, int maxReturn) {
    NBestCounter counter = new NBestCounter(maxReturn, true);
    mRootNode.topNGrams(counter, new char[nGramOrder], 0, nGramOrder);
    return counter.toObjectToCounter();
  }

  /**
   * Returns the count in the training corpus for the specified sequence of characters. The count
   * returned may have been reduced from the raw counts in training cases by pruning.
   *
   * @param cSeq Character sequence.
   * @return Count of character sequence in model.
   */
  public long count(CharSequence cSeq) {
    return count(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length());
  }

  /**
   * Returns the sum of the counts of all character sequences one character longer than the
   * specified character sequence.
   *
   * @param cSeq Character sequence.
   * @return The sum of the counts of all character sequences one character longer than the
   *     specified character sequence.
   */
  public long extensionCount(CharSequence cSeq) {
    return mRootNode.contextCount(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length());
  }

  /**
   * Increments the count of all substrings of the specified character array slice up to the maximum
   * length specified in the constructor.
   *
   * @param cs Underlying character array.
   * @param start Index of first character in slice.
   * @param end Index of one past last character in slice.
   * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the
   *     bounds of character sequence.
   */
  public void incrementSubstrings(char[] cs, int start, int end) {
    incrementSubstrings(cs, start, end, 1);
  }

  /**
   * Increments by the specified count all substrings of the specified character array slice up to
   * the maximum length specified in the constructor.
   *
   * @param cs Underlying character array.
   * @param start Index of first character in slice.
   * @param end Index of one past last character in slice.
   * @param count Amount to increment.
   * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the
   *     bounds of character sequence.
   */
  public void incrementSubstrings(char[] cs, int start, int end, int count) {
    Strings.checkArgsStartEnd(cs, start, end);
    // increment maximal strings and prefixes
    for (int i = start; i + mMaxLength <= end; ++i) incrementPrefixes(cs, i, i + mMaxLength, count);
    // increment short final strings and prefixes
    for (int i = Math.max(start, end - mMaxLength + 1); i < end; ++i)
      incrementPrefixes(cs, i, end, count);
  }

  /**
   * Increments the count of all substrings of the specified character sequence up to the maximum
   * length specified in the constructor.
   *
   * @param cSeq Character sequence.
   */
  public void incrementSubstrings(CharSequence cSeq) {
    incrementSubstrings(cSeq, 1);
  }

  /**
   * Increments by the specified count all substrings of the specified character sequence up to the
   * maximum length specified in the constructor.
   *
   * @param cSeq Character sequence.
   * @param count Amount to increment.
   */
  public void incrementSubstrings(CharSequence cSeq, int count) {
    incrementSubstrings(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length(), count);
  }

  /**
   * Increments the count of all prefixes of the specified character sequence up to the maximum
   * length specified in the constructor.
   *
   * @param cs Underlying character array.
   * @param start Index of first character in slice.
   * @param end Index of one past last character in slice.
   * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the
   *     bounds of character sequence.
   */
  public void incrementPrefixes(char[] cs, int start, int end) {
    incrementPrefixes(cs, start, end, 1);
  }

  /**
   * Increments the count of all prefixes of the specified character sequence up to the maximum
   * length specified in the constructor.
   *
   * @param cs Underlying character array.
   * @param start Index of first character in slice.
   * @param end Index of one past last character in slice.
   * @param count Amount to increment.
   * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the
   *     bounds of character sequence.
   */
  public void incrementPrefixes(char[] cs, int start, int end, int count) {
    Strings.checkArgsStartEnd(cs, start, end);
    mRootNode = mRootNode.increment(cs, start, end, count);
  }

  /**
   * Decrements all of the substrings of the specified character slice by one. This method may be
   * used in conjunction with {@link #incrementSubstrings(char[],int,int)} to implement counts for
   * conditional probability estimates without affecting underlying estimates. For example, the
   * following code:
   *
   * <blockquote>
   *
   * <pre>
   * char[] cs = &quot;abcdefghi&quot;.toCharArray();
   * counter.incrementSubstrings(cs,3,7);
   * counter.decrementSubstrings(cs,3,5);
   * </pre>
   *
   * </blockquote>
   *
   * will increment the substrings of <code>&quot;defg&quot;</code> and then decrement the
   * substrings of <code>&quot;de&quot;</code>, causing the net effect of incrementing the counts of
   * substrings <code>&quot;defg&quot;</code>, <code>&quot;efg&quot;</code>, <code>&quot;fg&quot;
   * </code>, <code>&quot;g&quot;</code>, <code>&quot;def&quot;</code>, <code>&quot;ef&quot;</code>,
   * and <code>&quot;f&quot;</code>. This has the effect of increasing the estimate of <code>g
   * </code> given <code>def</code>, without increasing the estimate of <code>d</code> in an empty
   * context.
   *
   * @param cs Underlying array of characters in slice.
   * @param start Index of first character in slice.
   * @param end Index of one past last character in slice.
   * @throws IllegalArgumentException If the array slice is valid.
   */
  public void decrementSubstrings(char[] cs, int start, int end) {
    Strings.checkArgsStartEnd(cs, start, end);
    for (int i = start; i < end; ++i)
      for (int j = i; j <= end; ++j) mRootNode = mRootNode.decrement(cs, i, j);
  }

  /**
   * Returns a string representation of the trie structure of counts underlying this counter.
   *
   * <p><b>Warning:</b> The resulting string will be very large if the number of substrings is
   * large. To avoid blowing out memory, do not call this method for large counters.
   *
   * @return String representation of this counter.
   */
  @Override
  public String toString() {
    return mRootNode.toString();
  }

  void toStringBuilder(StringBuilder sb) {
    mRootNode.toString(sb, 0);
  }

  /**
   * Decrements the unigram count for the specified character. This method is useful for training
   * conditional probabilities, even though it is not powerful enough to do it in full generality.
   *
   * @param c Decrement the unigram count for the specified character.
   */
  public void decrementUnigram(char c) {
    decrementUnigram(c, 1);
  }

  /**
   * Decrements the unigram count by the specified amount for the specified character. This method
   * is useful for training conditional probabilities, even though it is not powerful enough to do
   * it in full generality.
   *
   * @param c Decrement the unigram count for the specified character.
   * @param count Amount to decrement.
   */
  public void decrementUnigram(char c, int count) {
    mRootNode = mRootNode.decrement(new char[] {c}, 0, 1, count);
  }

  private List<Long> countsList(int nGramOrder) {
    List<Long> accum = new ArrayList<Long>();
    mRootNode.addCounts(accum, nGramOrder);
    return accum;
  }

  /**
   * Writes an encoding of this counter to the specified output stream. It may be read back in using
   * {@link #readFrom(InputStream)}.
   *
   * <p>The output is produced using a {@link BitTrieWriter} wrapped around a {@link BitOutput}
   * wrapped around the specified underlying output stream. First, the bit output is used to
   * delta-code the maximum n-gram plus 1. Then, the trie is encoded as described in {@link
   * BitTrieWriter}. Finally, the bit output is flushed. The underlying output stream is neither
   * flushed nor closed, allowing them to be used for other pruposes after this counter is written.
   *
   * <p>If necessary for efficiency, streams should be buffered before being passed to this method.
   *
   * @param out Underlying output stream for writing.
   * @throws IOException If there is an underlying I/O error.
   */
  public void writeTo(OutputStream out) throws IOException {
    BitOutput bitOut = new BitOutput(out);
    bitOut.writeDelta(mMaxLength + 1L);
    TrieWriter writer = new BitTrieWriter(bitOut);
    writeCounter(this, writer, mMaxLength);
    bitOut.flush();
  }

  /**
   * Writes the specified sequence counter to the specified trie writer, restricting output to
   * n-grams not longer than the specified maximum.
   *
   * @param counter Counter to write.
   * @param writer Trie writer to which counter is written.
   * @param maxNGram Maximum length n-gram written.
   * @throws IOException If there is an underlying I/O error.
   */
  public static void writeCounter(CharSeqCounter counter, TrieWriter writer, int maxNGram)
      throws IOException {

    writeCounter(new char[maxNGram], 0, counter, writer);
  }

  /**
   * Reads a trie character sequence counter from the specified input stream.
   *
   * <p>The expected encoding is described in {@link #writeTo(OutputStream)}.
   *
   * <p>If necessary for efficiency, streams should be buffered before being passed to this method.
   *
   * @param in Underlying input stream for reading.
   * @throws IOException If there is an underlying I/O error.
   */
  public static TrieCharSeqCounter readFrom(InputStream in) throws IOException {

    BitInput bitIn = new BitInput(in);
    int maxNGram = (int) (bitIn.readDelta() - 1L);
    BitTrieReader reader = new BitTrieReader(bitIn);
    return readCounter(reader, maxNGram);
  }

  /**
   * Reads a trie character sequence counter from the specified trie reader, restricting the result
   * to the specified maximum n-gram.
   *
   * @param reader Reader from which to read the trie.
   * @param maxNGram Maximum length n-gram to read.
   * @return The counter read from the reader.
   * @throws IOException If there is an underlying I/O error.
   */
  public static TrieCharSeqCounter readCounter(TrieReader reader, int maxNGram) throws IOException {

    TrieCharSeqCounter counter = new TrieCharSeqCounter(maxNGram);
    counter.mRootNode = readNode(reader, 0, maxNGram);
    return counter;
  }

  static void writeCounter(char[] cs, int pos, CharSeqCounter counter, TrieWriter writer)
      throws IOException {

    long count = counter.count(cs, 0, pos);
    writer.writeCount(count);
    if (pos < cs.length) { // daughters within n-gram bound
      char[] csNext = counter.charactersFollowing(cs, 0, pos);
      for (int i = 0; i < csNext.length; ++i) {
        writer.writeSymbol(csNext[i]);
        cs[pos] = csNext[i];
        writeCounter(cs, pos + 1, counter, writer);
      }
    }
    writer.writeSymbol(-1L); // end of daughters
  }

  private static void skipNode(TrieReader reader) throws IOException {

    reader.readCount();
    while (reader.readSymbol() != -1) skipNode(reader);
  }

  private static Node readNode(TrieReader reader, int depth, int maxDepth) throws IOException {

    if (depth > maxDepth) {
      skipNode(reader);
      return null;
    }

    long count = reader.readCount();

    int depthPlus1 = depth + 1;

    long sym1 = reader.readSymbol();

    // 0+ daughters
    if (sym1 == -1L) return NodeFactory.createNode(count);

    // 1+ daughters
    Node node1 = readNode(reader, depthPlus1, maxDepth);
    long sym2 = reader.readSymbol();
    if (sym2 == -1L) return NodeFactory.createNodeFold((char) sym1, node1, count);

    Node node2 = readNode(reader, depthPlus1, maxDepth);
    long sym3 = reader.readSymbol();
    if (sym3 == -1L) return NodeFactory.createNode((char) sym1, node1, (char) sym2, node2, count);

    Node node3 = readNode(reader, depthPlus1, maxDepth);
    long sym4 = reader.readSymbol();
    if (sym4 == -1L)
      return NodeFactory.createNode(
          (char) sym1, node1, (char) sym2, node2, (char) sym3, node3, count);
    Node node4 = readNode(reader, depthPlus1, maxDepth);

    // 4+ daughters
    StringBuilder cBuf = new StringBuilder();
    cBuf.append((char) sym1);
    cBuf.append((char) sym2);
    cBuf.append((char) sym3);
    cBuf.append((char) sym4);

    List<Node> nodeList = new ArrayList<Node>();
    nodeList.add(node1);
    nodeList.add(node2);
    nodeList.add(node3);
    nodeList.add(node4);

    long sym;

    while ((sym = reader.readSymbol()) != -1L) {
      cBuf.append((char) sym);
      nodeList.add(readNode(reader, depthPlus1, maxDepth));
    }
    Node[] nodes = nodeList.toArray(EMPTY_NODE_ARRAY);
    char[] cs = Strings.toCharArray(cBuf);
    return NodeFactory.createNode(cs, nodes, count); // > 3 daughters
  }

  static final Node[] EMPTY_NODE_ARRAY = new Node[0];
}
Beispiel #30
0
 @Override
 protected DefaultNode<OWLClass> getNode(Set<OWLClass> entities) {
   return NodeFactory.getOWLClassNode(entities);
 }