/** * Initializes the three parts of the weighted Yale format for the neighbors of all nodes in the * input graph. * * @param graph The graph whose parameters should be initialized. * @param removeSelfLoops If true, self loops won't be in the computed neighbors */ public YaleFormatWeightedNeighbors( DirectedNodeEdgeGraph<NodeNameType> graph, boolean removeSelfLoops) { int numNodes = graph.getNumNodes(); this.neighborsFirstIdx = new IntArrayList(numNodes + 1); // Initialize the per-node values int neighborsSoFar = 0; Map<Integer, HashMap<Integer, Double>> edges = new HashMap<>(graph.getNumNodes()); for (int i = 0; i < graph.getNumNodes(); ++i) { edges.put(i, new HashMap<>()); } for (int i = 0; i < graph.getNumEdges(); ++i) { Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i); int l = edge.getFirst(); int r = edge.getSecond(); if (removeSelfLoops && (l == r)) { continue; } double w = 1.0; if (graph instanceof DirectedWeightedNodeEdgeGraph) { w = ((DirectedWeightedNodeEdgeGraph) graph).getEdgeWeight(i); } if (!edges.get(l).containsKey(r)) { edges.get(l).put(r, 0.0); } edges.get(l).put(r, w + edges.get(l).get(r)); if (!edges.get(r).containsKey(l)) { edges.get(r).put(l, 0.0); } edges.get(r).put(l, w + edges.get(r).get(l)); } for (int i = 0; i < graph.getNumNodes(); ++i) { // This is to optimize the nieghbors list this.neighborsFirstIdx.add(neighborsSoFar); neighborsSoFar += edges.get(i).size(); } this.neighborsFirstIdx.add(neighborsSoFar); // Initialize neighbors to null values (for filling on next loop) this.neighbors = new IntArrayList(neighborsSoFar); this.wNeighbors = new DoubleArrayList(neighborsSoFar); for (int i = 0; i < neighborsSoFar; ++i) { this.neighbors.add(-1); this.wNeighbors.add(0); } // Initialize the per-edge values for (Map.Entry<Integer, HashMap<Integer, Double>> edgeMap : edges.entrySet()) { int l = edgeMap.getKey(); for (Map.Entry<Integer, Double> edge : edgeMap.getValue().entrySet()) { int r = edge.getKey(); if (removeSelfLoops && (l == r)) { continue; } double w = edge.getValue(); int idx = findNextEmptyNeighbor(l); this.neighbors.set(idx, r); this.wNeighbors.set(idx, w); } } }
/** * Builds a StandardDistributionNormalizer by computing the mean and variance of the given * collection of values. It will exclude the given percentage of outliers from the value. * * @param values The values to use to build the normalizer. * @param outlierPercent The percentage of outliers to exclude. * @return The StandardDistributionNormalizer created from the mean and variance of the given * values. */ public static StandardDistributionNormalizer learn( final Collection<? extends Number> values, double outlierPercent) { if (values == null) { // Error: Bad values. throw new NullPointerException("values cannot be null."); } else if (outlierPercent < 0.0 || outlierPercent >= 1.0) { // Error: Bad outlier percent. throw new IllegalArgumentException("outlierPercent must be [0.0, 1.0)"); } int count = values.size(); if (count <= 0) { // Error: Not enough samples. throw new IllegalArgumentException("values cannot be empty."); } // Figure out the collection to compute the mean and variance on. Collection<? extends Number> included = values; if (outlierPercent > 0.0) { // Discard the given percentage of outliers by removing half that // percentage from each side. final ArrayList<Number> sorted = new ArrayList<Number>(values); Collections.sort(sorted, NumberComparator.INSTANCE); int numToDiscard = (int) (count * outlierPercent / 2.0); if (numToDiscard > 0 && (2 * numToDiscard) < count) { included = sorted.subList(numToDiscard, count - numToDiscard); } } // Get the new count of values to compute. count = included.size(); // Compute the mean. Pair<Double, Double> result = UnivariateStatisticsUtil.computeMeanAndVariance(included); double mean = result.getFirst(); double variance = ((count - 1.0) / (count)) * result.getSecond(); if (variance <= 0.0) { variance = 1.0; } return new StandardDistributionNormalizer(mean, variance); }
public static void main(String[] args) { Arguments a = handleArgs(args); if (a == null) { return; } // Read in the graph DirectedNodeEdgeGraph<String> graph; Map<String, Map<String, String>> nodeAttrs = new HashMap<>(); ; Map<String, Map<String, String>> edgeAttrs = new HashMap<>(); String extension = a.inputFilename.substring(a.inputFilename.lastIndexOf(".")); switch (extension.toLowerCase()) { case ".dot": graph = GraphFileIo.readDotFile(a.inputFilename, nodeAttrs, edgeAttrs); break; case ".csv": case ".el": case ".txt": graph = GraphFileIo.readEdgeListFile(a.inputFilename); break; default: throw new RuntimeException("Unknown file extension: " + extension); } // This object stores a link to the graph and computes/stores metrics as requested GraphMetrics<String> metrics = new GraphMetrics<>(graph); List<List<String>> output = new ArrayList<>(); List<String> tmp; // We now go through the various flags and add results to the output if (a.numNodes || a.graphAll || a.all) { tmp = new ArrayList<>(); tmp.add("Graph NumNodes"); tmp.add(Integer.toString(metrics.numNodes())); output.add(tmp); } if (a.numEdges || a.graphAll || a.all) { tmp = new ArrayList<>(); tmp.add("Graph NumEdges"); tmp.add(Integer.toString(metrics.numEdges())); output.add(tmp); } if (a.assortativity || a.graphAll || a.all) { tmp = new ArrayList<>(); tmp.add("Graph Assortativity"); tmp.add(Double.toString(metrics.degreeAssortativity())); output.add(tmp); } if (a.nodeDegree || a.nodeNumNeighbors || a.nodeNumSuccessors || a.nodeNumTriangles || a.nodeAll || a.printAttrs || a.all) { // Print the names of all nodes in order tmp = new ArrayList<>(); tmp.add("Node Names"); for (int i = 0; i < graph.getNumNodes(); ++i) { tmp.add(graph.getNode(i)); } output.add(tmp); } if (a.printAttrs || a.nodeAll || a.all) { Set<String> attrNames = getAllPossibleAttributes(nodeAttrs); if (attrNames.size() > 0) { for (String attr : attrNames) { tmp = new ArrayList<>(); tmp.add(attr); for (int i = 0; i < graph.getNumNodes(); ++i) { String v = nodeAttrs.get(graph.getNode(i)).get(attr); v = (v == null) ? "" : v; tmp.add(v); } output.add(tmp); } } } if (a.nodeDegree || a.nodeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Node Degrees"); for (int i = 0; i < graph.getNumNodes(); ++i) { tmp.add(Integer.toString(metrics.degree(i))); } output.add(tmp); } if (a.nodeNumNeighbors || a.nodeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Node NumNeighbors"); for (int i = 0; i < graph.getNumNodes(); ++i) { tmp.add(Integer.toString(metrics.numNeighbors(i))); } output.add(tmp); } if (a.nodeNumSuccessors || a.nodeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Node NumSuccessors"); for (int i = 0; i < graph.getNumNodes(); ++i) { tmp.add(Integer.toString(metrics.numSuccessors(i))); } output.add(tmp); } if (a.nodeNumTriangles || a.nodeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Node NumTriangles"); for (int i = 0; i < graph.getNumNodes(); ++i) { tmp.add(Integer.toString(metrics.numNodeTriangles(i))); } output.add(tmp); } if (a.edgeJaccardSimilarity || a.edgeNumTriangles || a.edgeTriangleDensity || a.printAttrs || a.edgeAll || a.all) { // First print all edges in order tmp = new ArrayList<>(); tmp.add("Edge Names"); for (int i = 0; i < graph.getNumEdges(); ++i) { Pair<Integer, Integer> e = graph.getEdgeEndpointIds(i); String edge = "(" + graph.getNode(e.getFirst()) + "-" + graph.getNode(e.getSecond()) + ")"; tmp.add(edge); } output.add(tmp); } if (a.printAttrs || a.edgeAll || a.all) { Set<String> attrNames = getAllPossibleAttributes(edgeAttrs); if (attrNames.size() > 0) { for (String attr : attrNames) { tmp = new ArrayList<>(); tmp.add(attr); for (int i = 0; i < graph.getNumEdges(); ++i) { Pair<Integer, Integer> ee = graph.getEdgeEndpointIds(i); String src = graph.getNode(ee.getFirst()); String dst = graph.getNode(ee.getSecond()); String edge = GraphFileIo.toEdgeName(src, dst); String v = edgeAttrs.get(edge).get(attr); v = (v == null) ? "" : v; tmp.add(v); } output.add(tmp); } } } if (a.edgeJaccardSimilarity || a.edgeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Edge JaccardSimilarity"); for (int i = 0; i < graph.getNumEdges(); ++i) { tmp.add(Double.toString(metrics.getEdgeJaccardSimilarity(i))); } output.add(tmp); } if (a.edgeNumTriangles || a.edgeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Edge NumTriangles"); for (int i = 0; i < graph.getNumEdges(); ++i) { tmp.add(Integer.toString(metrics.numEdgeTriangles(i))); } output.add(tmp); } if (a.edgeTriangleDensity || a.edgeAll || a.all) { tmp = new ArrayList<>(); tmp.add("Edge TriangleDensity"); for (int i = 0; i < graph.getNumEdges(); ++i) { tmp.add(Double.toString(metrics.getPerEdgeTriangleDensity(i))); } output.add(tmp); } print(output, a.printCsv); }