public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
  /** Remove the temporary block file (if any) */
  public synchronized void unfinalizeBlock(Block b) throws IOException {
    // remove the block from in-memory data structure
    ActiveFile activefile = ongoingCreates.remove(b);
    if (activefile == null) {
      return;
    }
    volumeMap.remove(b);

    // delete the on-disk temp file
    if (delBlockFromDisk(activefile.file, getMetaFile(activefile.file, b), b)) {
      DataNode.LOG.warn("Block " + b + " unfinalized and removed. ");
    }
  }
 /** Turn the block identifier into a filename. */
 public synchronized File getFile(Block b) {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info != null) {
     return info.getFile();
   }
   return null;
 }
  /**
   * Make a copy of the block if this block is linked to an existing snapshot. This ensures that
   * modifying this block does not modify data in any existing snapshots.
   *
   * @param block Block
   * @param numLinks Detach if the number of links exceed this value
   * @throws IOException
   * @return - true if the specified block was detached
   */
  public boolean detachBlock(Block block, int numLinks) throws IOException {
    DatanodeBlockInfo info = null;

    synchronized (this) {
      info = volumeMap.get(block);
    }
    return info.detachBlock(block, numLinks);
  }
 synchronized File createTmpFile(FSVolume vol, Block blk) throws IOException {
   if (vol == null) {
     vol = volumeMap.get(blk).getVolume();
     if (vol == null) {
       throw new IOException("Could not find volume for block " + blk);
     }
   }
   return vol.createTmpFile(blk);
 }
  /** Complete the block write! */
  public synchronized void finalizeBlock(Block b) throws IOException {
    ActiveFile activeFile = ongoingCreates.get(b);
    if (activeFile == null) {
      throw new IOException("Block " + b + " is already finalized.");
    }
    File f = activeFile.file;
    if (f == null || !f.exists()) {
      throw new IOException("No temporary file " + f + " for block " + b);
    }
    FSVolume v = volumeMap.get(b).getVolume();
    if (v == null) {
      throw new IOException("No volume for temporary file " + f + " for block " + b);
    }

    File dest = null;
    dest = v.addBlock(b, f);
    volumeMap.put(b, new DatanodeBlockInfo(v, dest));
    ongoingCreates.remove(b);
  }
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration c = context.getConfiguration();
      String s = value.toString();
      String input[] = s.split(",");
      Text outputkey = new Text();
      Text outputvalue = new Text();
      double result = 0.0;

      /* multiplies matrix and vector entry with matching column value */

      result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1])));
      outputkey.set(input[0]);
      outputvalue.set(Double.toString(result));
      context.write(outputkey, outputvalue);
    }
 /** {@inheritDoc} */
 public void validateBlockMetadata(Block b) throws IOException {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info == null) {
     throw new IOException("Block " + b + " does not exist in volumeMap.");
   }
   FSVolume v = info.getVolume();
   File tmp = v.getTmpFile(b);
   File f = getFile(b);
   if (f == null) {
     f = tmp;
   }
   if (f == null) {
     throw new IOException("Block " + b + " does not exist on disk.");
   }
   if (!f.exists()) {
     throw new IOException("Block " + b + " block file " + f + " does not exist on disk.");
   }
   if (b.getNumBytes() != f.length()) {
     throw new IOException(
         "Block "
             + b
             + " length is "
             + b.getNumBytes()
             + " does not match block file length "
             + f.length());
   }
   File meta = getMetaFile(f, b);
   if (meta == null) {
     throw new IOException("Block " + b + " metafile does not exist.");
   }
   if (!meta.exists()) {
     throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk.");
   }
   if (meta.length() == 0) {
     throw new IOException("Block " + b + " metafile " + meta + " is empty.");
   }
   long stamp = parseGenerationStamp(f, meta);
   if (stamp != b.getGenerationStamp()) {
     throw new IOException(
         "Block "
             + b
             + " genstamp is "
             + b.getGenerationStamp()
             + " does not match meta file stamp "
             + stamp);
   }
 }
    void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) {
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          children[i].getVolumeMap(volumeMap, volume);
        }
      }

      File blockFiles[] = dir.listFiles();
      for (int i = 0; i < blockFiles.length; i++) {
        if (Block.isBlockFilename(blockFiles[i])) {
          long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]);
          volumeMap.put(
              new Block(blockFiles[i], blockFiles[i].length(), genStamp),
              new DatanodeBlockInfo(volume, blockFiles[i]));
        }
      }
    }
    /* called once at the beginning of the task */
    public void setup(Context context) throws IOException, InterruptedException {
      BufferedReader br = null;
      Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
      if (files != null && files.length > 0)
        br = new BufferedReader(new FileReader(files[0].toString()));
      String line = null;

      /* reads the cached file into a hashmap */
      try {
        while ((line = br.readLine()) != null) {
          String input[] = line.split(",");
          vector.put(Long.valueOf(input[0]), Double.valueOf(input[1]));
        }
      } finally {
        br.close();
      }
    }
  /** Returns handles to the block file and its metadata file */
  public synchronized BlockInputStreams getTmpInputStreams(Block b, long blkOffset, long ckoff)
      throws IOException {

    DatanodeBlockInfo info = volumeMap.get(b);
    if (info == null) {
      throw new IOException("Block " + b + " does not exist in volumeMap.");
    }
    FSVolume v = info.getVolume();
    File blockFile = v.getTmpFile(b);
    RandomAccessFile blockInFile = new RandomAccessFile(blockFile, "r");
    if (blkOffset > 0) {
      blockInFile.seek(blkOffset);
    }
    File metaFile = getMetaFile(blockFile, b);
    RandomAccessFile metaInFile = new RandomAccessFile(metaFile, "r");
    if (ckoff > 0) {
      metaInFile.seek(ckoff);
    }
    return new BlockInputStreams(
        new FileInputStream(blockInFile.getFD()), new FileInputStream(metaInFile.getFD()));
  }
 /** Sets the offset in the block to which the the next write will write data to. */
 public void setChannelPosition(Block b, BlockWriteStreams streams, long dataOffset, long ckOffset)
     throws IOException {
   long size = 0;
   synchronized (this) {
     FSVolume vol = volumeMap.get(b).getVolume();
     size = vol.getTmpFile(b).length();
   }
   if (size < dataOffset) {
     String msg =
         "Trying to change block file offset of block "
             + b
             + " to "
             + dataOffset
             + " but actual size of file is "
             + size;
     throw new IOException(msg);
   }
   FileOutputStream file = (FileOutputStream) streams.dataOut;
   file.getChannel().position(dataOffset);
   file = (FileOutputStream) streams.checksumOut;
   file.getChannel().position(ckOffset);
 }
示例#13
0
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }
  /**
   * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all
   * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block,
   * and then reopen the file.
   */
  public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException {
    //
    // Make sure the block isn't a valid one - we're still creating it!
    //
    if (isValidBlock(b)) {
      if (!isRecovery) {
        throw new BlockAlreadyExistsException(
            "Block " + b + " is valid, and cannot be written to.");
      }
      // If the block was successfully finalized because all packets
      // were successfully processed at the Datanode but the ack for
      // some of the packets were not received by the client. The client
      // re-opens the connection and retries sending those packets.
      // The other reason is that an "append" is occurring to this block.
      detachBlock(b, 1);
    }
    long blockSize = b.getNumBytes();

    //
    // Serialize access to /tmp, and check if file already there.
    //
    File f = null;
    List<Thread> threads = null;
    synchronized (this) {
      //
      // Is it already in the create process?
      //
      ActiveFile activeFile = ongoingCreates.get(b);
      if (activeFile != null) {
        f = activeFile.file;
        threads = activeFile.threads;

        if (!isRecovery) {
          throw new BlockAlreadyExistsException(
              "Block "
                  + b
                  + " has already been started (though not completed), and thus cannot be created.");
        } else {
          for (Thread thread : threads) {
            thread.interrupt();
          }
        }
        ongoingCreates.remove(b);
      }
      FSVolume v = null;
      if (!isRecovery) {
        v = volumes.getNextVolume(blockSize);
        // create temporary file to hold block in the designated volume
        f = createTmpFile(v, b);
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else if (f != null) {
        DataNode.LOG.info("Reopen already-open Block for append " + b);
        // create or reuse temporary file to hold block in the
        // designated volume
        v = volumeMap.get(b).getVolume();
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else {
        // reopening block for appending to it.
        DataNode.LOG.info("Reopen Block for append " + b);
        v = volumeMap.get(b).getVolume();
        f = createTmpFile(v, b);
        File blkfile = getBlockFile(b);
        File oldmeta = getMetaFile(b);
        File newmeta = getMetaFile(f, b);

        // rename meta file to tmp directory
        DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta);
        if (!oldmeta.renameTo(newmeta)) {
          throw new IOException(
              "Block "
                  + b
                  + " reopen failed. "
                  + " Unable to move meta file  "
                  + oldmeta
                  + " to tmp dir "
                  + newmeta);
        }

        // rename block file to tmp directory
        DataNode.LOG.debug("Renaming " + blkfile + " to " + f);
        if (!blkfile.renameTo(f)) {
          if (!f.delete()) {
            throw new IOException(
                "Block " + b + " reopen failed. " + " Unable to remove file " + f);
          }
          if (!blkfile.renameTo(f)) {
            throw new IOException(
                "Block "
                    + b
                    + " reopen failed. "
                    + " Unable to move block file "
                    + blkfile
                    + " to tmp dir "
                    + f);
          }
        }
        volumeMap.put(b, new DatanodeBlockInfo(v));
      }
      if (f == null) {
        DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file.");
        throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file.");
      }
      ongoingCreates.put(b, new ActiveFile(f, threads));
    }

    try {
      if (threads != null) {
        for (Thread thread : threads) {
          thread.join();
        }
      }
    } catch (InterruptedException e) {
      throw new IOException("Recovery waiting for thread interrupted.");
    }

    //
    // Finally, allow a writer to the block file
    // REMIND - mjc - make this a filter stream that enforces a max
    // block size, so clients can't go crazy
    //
    File metafile = getMetaFile(f, b);
    DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length());
    DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length());
    return createBlockWriteStreams(f, metafile);
  }
  /**
   * Try to update an old block to a new block. If there are ongoing create threads running for the
   * old block, the threads will be returned without updating the block.
   *
   * @return ongoing create threads if there is any. Otherwise, return null.
   */
  private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock)
      throws IOException {
    // check ongoing create threads
    final ActiveFile activefile = ongoingCreates.get(oldblock);
    if (activefile != null && !activefile.threads.isEmpty()) {
      // remove dead threads
      for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) {
        final Thread t = i.next();
        if (!t.isAlive()) {
          i.remove();
        }
      }

      // return living threads
      if (!activefile.threads.isEmpty()) {
        return new ArrayList<Thread>(activefile.threads);
      }
    }

    // No ongoing create threads is alive. Update block.
    File blockFile = findBlockFile(oldblock.getBlockId());
    if (blockFile == null) {
      throw new IOException("Block " + oldblock + " does not exist.");
    }

    File oldMetaFile = findMetaFile(blockFile);
    long oldgs = parseGenerationStamp(blockFile, oldMetaFile);

    // rename meta file to a tmp file
    File tmpMetaFile =
        new File(
            oldMetaFile.getParent(),
            oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp());
    if (!oldMetaFile.renameTo(tmpMetaFile)) {
      throw new IOException("Cannot rename block meta file to " + tmpMetaFile);
    }

    // update generation stamp
    if (oldgs > newblock.getGenerationStamp()) {
      throw new IOException(
          "Cannot update block (id="
              + newblock.getBlockId()
              + ") generation stamp from "
              + oldgs
              + " to "
              + newblock.getGenerationStamp());
    }

    // update length
    if (newblock.getNumBytes() > oldblock.getNumBytes()) {
      throw new IOException(
          "Cannot update block file (="
              + blockFile
              + ") length from "
              + oldblock.getNumBytes()
              + " to "
              + newblock.getNumBytes());
    }
    if (newblock.getNumBytes() < oldblock.getNumBytes()) {
      truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes());
    }

    // rename the tmp file to the new meta file (with new generation stamp)
    File newMetaFile = getMetaFile(blockFile, newblock);
    if (!tmpMetaFile.renameTo(newMetaFile)) {
      throw new IOException("Cannot rename tmp meta file to " + newMetaFile);
    }

    updateBlockMap(ongoingCreates, oldblock, newblock);
    updateBlockMap(volumeMap, oldblock, newblock);

    // paranoia! verify that the contents of the stored block
    // matches the block file on disk.
    validateBlockMetadata(newblock);
    return null;
  }
 /**
  * We're informed that a block is no longer valid. We could lazily garbage-collect the block, but
  * why bother? just get rid of it.
  */
 public void invalidate(Block invalidBlks[]) throws IOException {
   boolean error = false;
   for (int i = 0; i < invalidBlks.length; i++) {
     File f = null;
     FSVolume v;
     synchronized (this) {
       f = getFile(invalidBlks[i]);
       DatanodeBlockInfo dinfo = volumeMap.get(invalidBlks[i]);
       if (dinfo == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". BlockInfo not found in volumeMap.");
         error = true;
         continue;
       }
       v = dinfo.getVolume();
       if (f == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Block not found in blockMap."
                 + ((v == null) ? " " : " Block found in volumeMap."));
         error = true;
         continue;
       }
       if (v == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". No volume for this block."
                 + " Block found in blockMap. "
                 + f
                 + ".");
         error = true;
         continue;
       }
       File parent = f.getParentFile();
       if (parent == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Parent not found for file "
                 + f
                 + ".");
         error = true;
         continue;
       }
       v.clearPath(parent);
       volumeMap.remove(invalidBlks[i]);
     }
     File metaFile = getMetaFile(f, invalidBlks[i]);
     long blockSize = f.length() + metaFile.length();
     if (!f.delete() || (!metaFile.delete() && metaFile.exists())) {
       DataNode.LOG.warn(
           "Unexpected error trying to delete block " + invalidBlks[i] + " at file " + f);
       error = true;
       continue;
     }
     v.decDfsUsed(blockSize);
     DataNode.LOG.info("Deleting block " + invalidBlks[i] + " file " + f);
     if (f.exists()) {
       //
       // This is a temporary check especially for hadoop-1220.
       // This will go away in the future.
       //
       DataNode.LOG.info("File " + f + " was deleted but still exists!");
     }
   }
   if (error) {
     throw new IOException("Error in deleting blocks.");
   }
 }