public void ingestSentence(String example) { String[] words = example.split(" "); FSANode curNode = start; FSANode nextNode; for (String word : words) { nextNode = curNode.nextNode(word); if (nextNode == null) { nextNode = new FSANode(nodeCount++); nodes.add(nextNode); FSAEdge edge = new FSAEdge(curNode, nextNode, word); curNode.addEdge(edge); nextNode.addEdge(edge); } curNode = nextNode; } finalNodes.add(curNode); curNode.setAccepting(true); }
private FSANode mergeNodes(Set<FSANode> nodes) { List<FSAEdge> incomingEdgesToAdjust = new ArrayList<>(); List<FSAEdge> outgoingEdgesToAdjust = new ArrayList<>(); List<String> ids = new ArrayList<>(); boolean replacedStart = false; boolean replacedFinal = false; boolean accepting = false; for (FSANode node : nodes) { incomingEdgesToAdjust.addAll(node.getIncomingEdges()); outgoingEdgesToAdjust.addAll(node.getOutgoingEdges()); ids.add(node.getID()); if (node == start) { replacedStart = true; start = null; } if (finalNodes.contains(node)) { replacedFinal = true; } if (node.isAccepting()) { accepting = true; } removeNode(node); } FSANode newNode = new FSANode(ids); newNode.setAccepting(accepting); Set<String> selfLoopLabels = new HashSet<>(); for (FSAEdge edge : outgoingEdgesToAdjust) { FSANode formerSource = edge.getSource(); edge.setSource(newNode); if (nodes.contains(edge.getTarget())) { // self loop if (!selfLoopLabels.contains(edge.getLabel())) { edge.setTarget(newNode); newNode.addEdge(edge); selfLoopLabels.add(edge.getLabel()); } else { edge.getTarget().removeEdge(edge); } continue; } if (!newNode.getOutgoingEdges().contains(edge)) { newNode.addEdge(edge); edge.getTarget().removeEdges(edge.getTarget().edgesFrom(formerSource)); if (!edge.getTarget().edgesFrom(newNode).contains(edge)) { edge.getTarget().addEdge(edge); } List<FSAEdge> commonEdges = edge.getTarget().edgesFrom(newNode); commonEdges.retainAll(newNode.edgesTo(edge.getTarget())); assert commonEdges.size() > 0; assert !edge.getTarget().hasEdgeFrom(formerSource); } else { FSANode target = edge.getTarget(); if (target .edgesFrom(newNode) .stream() .filter(e -> e.getLabel().equals(edge.getLabel())) .count() > 1) { target.removeEdge(edge); } } } for (FSAEdge edge : incomingEdgesToAdjust) { FSANode formerTarget = edge.getTarget(); edge.setTarget(newNode); if (edge.getSource().equals(newNode) || nodes.contains(edge.getSource())) { // self loop, handled as outgoing above assert selfLoopLabels.contains(edge.getLabel()); continue; } if (!newNode.getIncomingEdges().contains(edge)) { newNode.addEdge(edge); edge.getSource().removeEdges(edge.getSource().edgesTo(formerTarget)); if (!edge.getSource().edgesTo(newNode).contains(edge)) { edge.getSource().addEdge(edge); } List<FSAEdge> commonEdges = edge.getSource().edgesTo(newNode); commonEdges.retainAll(newNode.edgesFrom(edge.getSource())); assert commonEdges.size() > 0; assert !edge.getSource().hasEdgeTo(formerTarget); } else { FSANode source = edge.getSource(); source.removeEdge(edge); } } assert !this.nodes.contains(newNode); this.nodes.add(newNode); if (replacedStart) { start = newNode; } if (replacedFinal) { finalNodes.add(newNode); } return newNode; }