/**
   * Initializes the three parts of the weighted Yale format for the neighbors of all nodes in the
   * input graph.
   *
   * @param graph The graph whose parameters should be initialized.
   * @param removeSelfLoops If true, self loops won't be in the computed neighbors
   */
  public YaleFormatWeightedNeighbors(
      DirectedNodeEdgeGraph<NodeNameType> graph, boolean removeSelfLoops) {
    int numNodes = graph.getNumNodes();
    this.neighborsFirstIdx = new IntArrayList(numNodes + 1);

    // Initialize the per-node values
    int neighborsSoFar = 0;
    Map<Integer, HashMap<Integer, Double>> edges = new HashMap<>(graph.getNumNodes());
    for (int i = 0; i < graph.getNumNodes(); ++i) {
      edges.put(i, new HashMap<>());
    }
    for (int i = 0; i < graph.getNumEdges(); ++i) {
      Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
      int l = edge.getFirst();
      int r = edge.getSecond();
      if (removeSelfLoops && (l == r)) {
        continue;
      }
      double w = 1.0;
      if (graph instanceof DirectedWeightedNodeEdgeGraph) {
        w = ((DirectedWeightedNodeEdgeGraph) graph).getEdgeWeight(i);
      }
      if (!edges.get(l).containsKey(r)) {
        edges.get(l).put(r, 0.0);
      }
      edges.get(l).put(r, w + edges.get(l).get(r));
      if (!edges.get(r).containsKey(l)) {
        edges.get(r).put(l, 0.0);
      }
      edges.get(r).put(l, w + edges.get(r).get(l));
    }
    for (int i = 0; i < graph.getNumNodes(); ++i) {
      // This is to optimize the nieghbors list
      this.neighborsFirstIdx.add(neighborsSoFar);
      neighborsSoFar += edges.get(i).size();
    }
    this.neighborsFirstIdx.add(neighborsSoFar);
    // Initialize neighbors to null values (for filling on next loop)
    this.neighbors = new IntArrayList(neighborsSoFar);
    this.wNeighbors = new DoubleArrayList(neighborsSoFar);
    for (int i = 0; i < neighborsSoFar; ++i) {
      this.neighbors.add(-1);
      this.wNeighbors.add(0);
    }
    // Initialize the per-edge values
    for (Map.Entry<Integer, HashMap<Integer, Double>> edgeMap : edges.entrySet()) {
      int l = edgeMap.getKey();
      for (Map.Entry<Integer, Double> edge : edgeMap.getValue().entrySet()) {
        int r = edge.getKey();
        if (removeSelfLoops && (l == r)) {
          continue;
        }
        double w = edge.getValue();
        int idx = findNextEmptyNeighbor(l);
        this.neighbors.set(idx, r);
        this.wNeighbors.set(idx, w);
      }
    }
  }
  /**
   * Builds a StandardDistributionNormalizer by computing the mean and variance of the given
   * collection of values. It will exclude the given percentage of outliers from the value.
   *
   * @param values The values to use to build the normalizer.
   * @param outlierPercent The percentage of outliers to exclude.
   * @return The StandardDistributionNormalizer created from the mean and variance of the given
   *     values.
   */
  public static StandardDistributionNormalizer learn(
      final Collection<? extends Number> values, double outlierPercent) {
    if (values == null) {
      // Error: Bad values.
      throw new NullPointerException("values cannot be null.");
    } else if (outlierPercent < 0.0 || outlierPercent >= 1.0) {
      // Error: Bad outlier percent.
      throw new IllegalArgumentException("outlierPercent must be [0.0, 1.0)");
    }

    int count = values.size();
    if (count <= 0) {
      // Error: Not enough samples.
      throw new IllegalArgumentException("values cannot be empty.");
    }

    // Figure out the collection to compute the mean and variance on.
    Collection<? extends Number> included = values;
    if (outlierPercent > 0.0) {
      // Discard the given percentage of outliers by removing half that
      // percentage from each side.
      final ArrayList<Number> sorted = new ArrayList<Number>(values);
      Collections.sort(sorted, NumberComparator.INSTANCE);
      int numToDiscard = (int) (count * outlierPercent / 2.0);
      if (numToDiscard > 0 && (2 * numToDiscard) < count) {
        included = sorted.subList(numToDiscard, count - numToDiscard);
      }
    }

    // Get the new count of values to compute.
    count = included.size();

    // Compute the mean.
    Pair<Double, Double> result = UnivariateStatisticsUtil.computeMeanAndVariance(included);
    double mean = result.getFirst();
    double variance = ((count - 1.0) / (count)) * result.getSecond();
    if (variance <= 0.0) {
      variance = 1.0;
    }
    return new StandardDistributionNormalizer(mean, variance);
  }
Exemple #3
0
  public static void main(String[] args) {
    Arguments a = handleArgs(args);
    if (a == null) {
      return;
    }

    // Read in the graph
    DirectedNodeEdgeGraph<String> graph;
    Map<String, Map<String, String>> nodeAttrs = new HashMap<>();
    ;
    Map<String, Map<String, String>> edgeAttrs = new HashMap<>();
    String extension = a.inputFilename.substring(a.inputFilename.lastIndexOf("."));
    switch (extension.toLowerCase()) {
      case ".dot":
        graph = GraphFileIo.readDotFile(a.inputFilename, nodeAttrs, edgeAttrs);
        break;
      case ".csv":
      case ".el":
      case ".txt":
        graph = GraphFileIo.readEdgeListFile(a.inputFilename);
        break;
      default:
        throw new RuntimeException("Unknown file extension: " + extension);
    }

    // This object stores a link to the graph and computes/stores metrics as requested
    GraphMetrics<String> metrics = new GraphMetrics<>(graph);
    List<List<String>> output = new ArrayList<>();
    List<String> tmp;
    // We now go through the various flags and add results to the output
    if (a.numNodes || a.graphAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Graph NumNodes");
      tmp.add(Integer.toString(metrics.numNodes()));
      output.add(tmp);
    }
    if (a.numEdges || a.graphAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Graph NumEdges");
      tmp.add(Integer.toString(metrics.numEdges()));
      output.add(tmp);
    }
    if (a.assortativity || a.graphAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Graph Assortativity");
      tmp.add(Double.toString(metrics.degreeAssortativity()));
      output.add(tmp);
    }
    if (a.nodeDegree
        || a.nodeNumNeighbors
        || a.nodeNumSuccessors
        || a.nodeNumTriangles
        || a.nodeAll
        || a.printAttrs
        || a.all) {
      // Print the names of all nodes in order
      tmp = new ArrayList<>();
      tmp.add("Node Names");
      for (int i = 0; i < graph.getNumNodes(); ++i) {
        tmp.add(graph.getNode(i));
      }
      output.add(tmp);
    }
    if (a.printAttrs || a.nodeAll || a.all) {
      Set<String> attrNames = getAllPossibleAttributes(nodeAttrs);
      if (attrNames.size() > 0) {
        for (String attr : attrNames) {
          tmp = new ArrayList<>();
          tmp.add(attr);
          for (int i = 0; i < graph.getNumNodes(); ++i) {
            String v = nodeAttrs.get(graph.getNode(i)).get(attr);
            v = (v == null) ? "" : v;
            tmp.add(v);
          }
          output.add(tmp);
        }
      }
    }
    if (a.nodeDegree || a.nodeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Node Degrees");
      for (int i = 0; i < graph.getNumNodes(); ++i) {
        tmp.add(Integer.toString(metrics.degree(i)));
      }
      output.add(tmp);
    }
    if (a.nodeNumNeighbors || a.nodeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Node NumNeighbors");
      for (int i = 0; i < graph.getNumNodes(); ++i) {
        tmp.add(Integer.toString(metrics.numNeighbors(i)));
      }
      output.add(tmp);
    }
    if (a.nodeNumSuccessors || a.nodeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Node NumSuccessors");
      for (int i = 0; i < graph.getNumNodes(); ++i) {
        tmp.add(Integer.toString(metrics.numSuccessors(i)));
      }
      output.add(tmp);
    }
    if (a.nodeNumTriangles || a.nodeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Node NumTriangles");
      for (int i = 0; i < graph.getNumNodes(); ++i) {
        tmp.add(Integer.toString(metrics.numNodeTriangles(i)));
      }
      output.add(tmp);
    }
    if (a.edgeJaccardSimilarity
        || a.edgeNumTriangles
        || a.edgeTriangleDensity
        || a.printAttrs
        || a.edgeAll
        || a.all) {
      // First print all edges in order
      tmp = new ArrayList<>();
      tmp.add("Edge Names");
      for (int i = 0; i < graph.getNumEdges(); ++i) {
        Pair<Integer, Integer> e = graph.getEdgeEndpointIds(i);
        String edge = "(" + graph.getNode(e.getFirst()) + "-" + graph.getNode(e.getSecond()) + ")";
        tmp.add(edge);
      }
      output.add(tmp);
    }
    if (a.printAttrs || a.edgeAll || a.all) {
      Set<String> attrNames = getAllPossibleAttributes(edgeAttrs);
      if (attrNames.size() > 0) {
        for (String attr : attrNames) {
          tmp = new ArrayList<>();
          tmp.add(attr);
          for (int i = 0; i < graph.getNumEdges(); ++i) {
            Pair<Integer, Integer> ee = graph.getEdgeEndpointIds(i);
            String src = graph.getNode(ee.getFirst());
            String dst = graph.getNode(ee.getSecond());
            String edge = GraphFileIo.toEdgeName(src, dst);

            String v = edgeAttrs.get(edge).get(attr);
            v = (v == null) ? "" : v;
            tmp.add(v);
          }
          output.add(tmp);
        }
      }
    }
    if (a.edgeJaccardSimilarity || a.edgeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Edge JaccardSimilarity");
      for (int i = 0; i < graph.getNumEdges(); ++i) {
        tmp.add(Double.toString(metrics.getEdgeJaccardSimilarity(i)));
      }
      output.add(tmp);
    }
    if (a.edgeNumTriangles || a.edgeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Edge NumTriangles");
      for (int i = 0; i < graph.getNumEdges(); ++i) {
        tmp.add(Integer.toString(metrics.numEdgeTriangles(i)));
      }
      output.add(tmp);
    }
    if (a.edgeTriangleDensity || a.edgeAll || a.all) {
      tmp = new ArrayList<>();
      tmp.add("Edge TriangleDensity");
      for (int i = 0; i < graph.getNumEdges(); ++i) {
        tmp.add(Double.toString(metrics.getPerEdgeTriangleDensity(i)));
      }
      output.add(tmp);
    }

    print(output, a.printCsv);
  }