コード例 #1
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    protected static double Distance(List<Double> p1, List<Double> p2) {
      double sumOfSquaredDifferences = 0.0;

      for (int i = 0; i < p1.size(); i++) {
        sumOfSquaredDifferences += Math.pow(p1.get(i) - p2.get(i), 2.0);
      }

      return Math.pow(sumOfSquaredDifferences, 0.5);
    }
コード例 #2
0
  /**
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
   */
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      continue;
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(
              new FileSplit(
                  path,
                  length - bytesRemaining,
                  bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
  }
コード例 #3
0
 /** return progress based on the amount of data processed so far. */
 public float getProgress() throws IOException, InterruptedException {
   long subprogress = 0; // bytes processed in current split
   if (null != curReader) {
     // idx is always one past the current subsplit's true index.
     subprogress = (long) (curReader.getProgress() * split.getLength(idx - 1));
   }
   return Math.min(1.0f, (progress + subprogress) / (float) (split.getLength()));
 }
コード例 #4
0
  private List<InputSplit> getSplits(
      Configuration configuration, int numSplits, long totalSizeBytes) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);

    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    long currentSplitSize = 0;
    long lastSplitStart = 0;
    long lastPosition = 0;

    final Path listingFilePath = getListingFilePath(configuration);

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "Average bytes per map: "
              + nBytesPerSplit
              + ", Number of maps: "
              + numSplits
              + ", total size: "
              + totalSizeBytes);
    }
    SequenceFile.Reader reader = null;
    try {
      reader = getListingFileReader(configuration);
      while (reader.next(srcRelPath, srcFileStatus)) {
        // If adding the current file would cause the bytes per map to exceed
        // limit. Add the current file to new split
        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
          FileSplit split =
              new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null);
          if (LOG.isDebugEnabled()) {
            LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize);
          }
          splits.add(split);
          lastSplitStart = lastPosition;
          currentSplitSize = 0;
        }
        currentSplitSize += srcFileStatus.getLen();
        lastPosition = reader.getPosition();
      }
      if (lastPosition > lastSplitStart) {
        FileSplit split =
            new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null);
        if (LOG.isDebugEnabled()) {
          LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize);
        }
        splits.add(split);
      }

    } finally {
      IOUtils.closeStream(reader);
    }

    return splits;
  }
コード例 #5
0
  public void reduce(LongWritable key, Iterable<Text> values, Context context)
      throws IOException, InterruptedException {
    LongWritable curNodeId = key;
    double previousPRValue = 1;
    double nextPRValue = 0;
    double localResidual = 0;
    String edgeListOfCurNode = "";
    long localResidualTransformed = 0;

    for (Text value : values) {
      String[] inputInfo = value.toString().split("\\s+");

      // incoming pagerank value
      if (inputInfo.length == 1) {
        nextPRValue += Double.parseDouble(inputInfo[0]);
      }
      // current node info
      else if (inputInfo.length == 3) {
        edgeListOfCurNode = inputInfo[2];
        previousPRValue = Double.parseDouble(inputInfo[1]);
      } else if (inputInfo.length == 2) {
        previousPRValue = Double.parseDouble(inputInfo[1]);
      } else {
        System.out.println("ERROR: received unexpected TEXT in length");
      }
    }
    if (previousPRValue == 1) System.out.println("No node info has been received by a reducer");
    // calculate the pagerank value according to the given formula
    nextPRValue = pagerankFormula(nextPRValue);

    // should also iterate sink nodes list, add the evenly splitted value

    // reducer should store the updated node info(NPR) to output directory
    context.write(null, new Text(curNodeId + " " + nextPRValue + " " + edgeListOfCurNode));

    // then compare PPR with NPR
    try {
      localResidual = Math.abs(previousPRValue - nextPRValue) / nextPRValue;
      localResidualTransformed = (long) (localResidual * 10000);
      // System.out.println("Make sure you got the right transformed residual :
      // "+localResidualTransformed);

    } catch (ArithmeticException e) {
      System.out.println("PPR is zero. Check where you get the value!");
    }

    // assume there is a global counter called residualCounter;

    context.getCounter(myCounter.ResidualCounter.RESIDUAL_SUM).increment(localResidualTransformed);
  }
コード例 #6
0
  // v is all nodes within this block B
  // u is all nodes pointing to this set of v
  // some u are inside the block as well, those are in BE
  // some u are outside the block, those are in BC
  // BE = the Edges from Nodes in Block B
  // BC = the Boundary Conditions
  // NPR[v] = Next PageRank value of Node v
  protected double IterateBlockOnce() {
    // used to iterate through the BE list of edges
    ArrayList<String> uList = new ArrayList<String>();
    // npr = current PageRank value of Node v
    double npr = 0.0;
    // r = sum of PR[u]/deg[u] for boundary nodes pointing to v
    double r = 0.0;
    // resErr = the avg residual error for this iteration
    double resErr = 0.0;

    HashMap<String, Double> tempmap = new HashMap<String, Double>();
    for (String v : vList) {
      npr = 0.0f;
      double prevPR = newPR.get(v);

      // calculate newPR using PR data from any BE nodes for this node
      if (BE.containsKey(v)) {
        uList = BE.get(v);
        for (String u : uList) {
          // npr += PR[u] / deg(u);
          NodeData uNode = nodeDataMap.get(u);
          npr += (newPR.get(u) / uNode.getDegrees());
        }
      }

      // add on any PR from nodes outside the block (BC)
      if (BC.containsKey(v)) {
        r = BC.get(v);
        npr += r;
      }

      // NPR[v] = d*NPR[v] + (1-d)/N;
      npr = (dampingFactor * npr) + randomJumpFactor;
      // update the global newPR map
      tempmap.put(v, npr);
      // newPR.put(v, npr);
      // track the sum of the residual errors
      resErr += Math.abs(prevPR - npr) / npr;
    }
    // calculate the average residual error and return it
    newPR = tempmap;
    resErr = resErr / vList.size();
    return resErr;
  }
コード例 #7
0
  protected void reduce(Text key, Iterable<Text> values, Context context)
      throws IOException, InterruptedException {

    Iterator<Text> itr = values.iterator();
    Text input = new Text();
    String[] inputTokens = null;

    // initialize/reset all variables
    Double pageRankOld = 0.0;
    Double residualError = 0.0;

    String output = "";
    Integer maxNode = 0;

    ArrayList<String> temp = new ArrayList<String>();
    Double tempBC = 0.0;
    vList.clear();
    newPR.clear();
    BE.clear();
    BC.clear();
    nodeDataMap.clear();

    while (itr.hasNext()) {
      input = itr.next();
      inputTokens = input.toString().split(" ");
      // if first element is PR, it is the node ID, previous pagerank and outgoing edgelist for this
      // node
      if (inputTokens[0].equals("PR")) {
        String nodeID = inputTokens[1];
        pageRankOld = Double.parseDouble(inputTokens[2]);
        newPR.put(nodeID, pageRankOld);
        NodeData node = new NodeData();
        node.setNodeID(nodeID);
        node.setPageRank(pageRankOld);
        if (inputTokens.length == 4) {
          node.setEdgeList(inputTokens[3]);
          node.setDegrees(inputTokens[3].split(",").length);
        }
        vList.add(nodeID);
        nodeDataMap.put(nodeID, node);
        // keep track of the max nodeID for this block
        if (Integer.parseInt(nodeID) > maxNode) {
          maxNode = Integer.parseInt(nodeID);
        }

        // if BE, it is an in-block edge
      } else if (inputTokens[0].equals("BE")) {

        if (BE.containsKey(inputTokens[2])) {
          // Initialize BC for this v
          temp = BE.get(inputTokens[2]);
        } else {
          temp = new ArrayList<String>();
        }
        temp.add(inputTokens[1]);
        BE.put(inputTokens[2], temp);

        // if BC, it is an incoming node from outside of the block
      } else if (inputTokens[0].equals("BC")) {
        if (BC.containsKey(inputTokens[2])) {
          // Initialize BC for this v
          tempBC = BC.get(inputTokens[2]);
        } else {
          tempBC = 0.0;
        }
        tempBC += Double.parseDouble(inputTokens[3]);
        BC.put(inputTokens[2], tempBC);
      }
    }

    int i = 0;
    do {
      i++;
      residualError = IterateBlockOnce();
      // System.out.println("Block " + key + " pass " + i + " resError:" + residualError);
    } while (residualError > threshold);

    // i < maxIterations &&

    // compute the ultimate residual error for each node in this block
    residualError = 0.0;
    for (String v : vList) {
      NodeData node = nodeDataMap.get(v);
      residualError += Math.abs(node.getPageRank() - newPR.get(v)) / newPR.get(v);
    }
    residualError = residualError / vList.size();
    // System.out.println("Block " + key + " overall resError for iteration: " + residualError);

    // add the residual error to the counter that is tracking the overall sum (must be expressed as
    // a long value)
    long residualAsLong = (long) Math.floor(residualError * PageRankBlock.precision);
    long numberOfIterations = (long) (i);
    context.getCounter(PageRankBlock.ProjectCounters.RESIDUAL_ERROR).increment(residualAsLong);

    context
        .getCounter(PageRankBlock.ProjectCounters.AVERAGE_ITERATIONS)
        .increment(numberOfIterations);

    // output should be
    //	key:nodeID (for this node)
    //	value:<pageRankNew> <degrees> <comma-separated outgoing edgeList>
    for (String v : vList) {
      NodeData node = nodeDataMap.get(v);
      output = newPR.get(v) + " " + node.getDegrees() + " " + node.getEdgeList();
      Text outputText = new Text(output);
      Text outputKey = new Text(v);
      context.write(outputKey, outputText);
      if (v.equals(maxNode.toString())) {
        System.out.println("Block:" + key + " | node:" + v + " | pageRank:" + newPR.get(v));
      }
    }

    cleanup(context);
  }