Ejemplo n.º 1
0
  private static void StartingJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection Starting");

    job.setJarByClass(LevNestDissectJob.class);
    job.setMapperClass(StartVertexMapper.class);
    job.setReducerClass(StartVertexReducer.class);

    in = out.suffix("/" + outPath_count);
    FileInputFormat.addInputPath(job, in);

    out_start = out.suffix("/" + outPath_start);
    if (fs.exists(out_start)) {
      fs.delete(out_start, true);
    }
    FileOutputFormat.setOutputPath(job, out_start);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VertexWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);

    depth = depth == 0 ? depth + 1 : depth;
    wasStart = true;
  }
  private FSDataOutputStream setupOutputFile(Path path) throws IOException {
    fs = path.getFileSystem(CompatibilityUtil.getConfiguration(context));
    inputPath = path;

    // For /a/b/c.lzo, tmpIndexPath = /a/b/c.lzo.index.tmp,
    // and it is moved to realIndexPath = /a/b/c.lzo.index upon completion.
    tmpIndexPath = path.suffix(LzoIndex.LZO_TMP_INDEX_SUFFIX);
    realIndexPath = path.suffix(LzoIndex.LZO_INDEX_SUFFIX);

    // Delete the old index files if they exist.
    fs.delete(tmpIndexPath, false);
    fs.delete(realIndexPath, false);

    return fs.create(tmpIndexPath, false);
  }
Ejemplo n.º 3
0
  /**
   * Creates an output segment file and sets up the output streams to point at it. If the file
   * already exists, retries with a different filename. This is a bit nasty -- after all, {@link
   * FileOutputFormat}'s work directory concept is supposed to prevent filename clashes -- but it
   * looks like Amazon Elastic MapReduce prevents use of per-task work directories if the output of
   * a job is on S3.
   *
   * <p>TODO: Investigate this and find a better solution.
   */
  private void createSegment() throws IOException {
    segmentsAttempted = 0;
    bytesWritten = 0;
    boolean success = false;

    while (!success) {
      Path path =
          workOutputPath.suffix(String.format(extensionFormat, segmentsCreated, segmentsAttempted));
      FileSystem fs = path.getFileSystem(conf);

      try {
        // The o.a.h.mapred OutputFormats overwrite existing files, whereas
        // the o.a.h.mapreduce OutputFormats don't overwrite. Bizarre...
        // Here, overwrite if progress != null, i.e. if using mapred API.
        FSDataOutputStream fsStream =
            (progress == null) ? fs.create(path, false) : fs.create(path, progress);
        byteStream = new CountingOutputStream(new BufferedOutputStream(fsStream));
        dataStream =
            new DataOutputStream(codec == null ? byteStream : codec.createOutputStream(byteStream));
        segmentsCreated++;
        logger.info("Writing to output file: {}", path);
        success = true;

      } catch (IOException e) {
        if (e.getMessage().startsWith("File already exists")) {
          logger.warn("Tried to create file {} but it already exists; retrying.", path);
          segmentsAttempted++; // retry
        } else {
          throw e;
        }
      }
    }
  }
Ejemplo n.º 4
0
 /** Returns the Hdfs size of the given region in bytes. */
 public long getHdfsSize(HRegionInfo info) throws IOException {
   Path tableDir =
       HTableDescriptor.getTableDir(
           FSUtils.getRootDir(hbaseConf_), Bytes.toBytes(hbaseTableName_));
   FileSystem fs = tableDir.getFileSystem(hbaseConf_);
   Path regionDir = tableDir.suffix("/" + info.getEncodedName());
   return fs.getContentSummary(regionDir).getLength();
 }
  /** Test compressible {@link GridmixRecord}. */
  @Test
  public void testCompressibleGridmixRecord() throws IOException {
    JobConf conf = new JobConf();
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);

    FileSystem lfs = FileSystem.getLocal(conf);
    int dataSize = 1024 * 1024 * 10; // 10 MB
    float ratio = 0.357F;

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord");
    lfs.delete(tempDir, true);

    // define a compressible GridmixRecord
    GridmixRecord record = new GridmixRecord(dataSize, 0);
    record.setCompressibility(true, ratio); // enable compression

    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);

    // write the record to a file
    Path recordFile = new Path(tempDir, "record");
    OutputStream outStream =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf);
    DataOutputStream out = new DataOutputStream(outStream);
    record.write(out);
    out.close();
    outStream.close();

    // open the compressed stream for reading
    Path actualRecordFile = recordFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0);

    // get the compressed file size
    long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen();

    GridmixRecord recordRead = new GridmixRecord();
    recordRead.readFields(new DataInputStream(in));

    assertEquals(
        "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize());
    assertTrue(
        "Failed to generate a compressible GridmixRecord",
        recordRead.getSize() > compressedFileSize);

    // check if the record can generate data with the desired compression ratio
    float seenRatio = ((float) compressedFileSize) / dataSize;
    assertEquals(
        CompressionEmulationUtil.standardizeCompressionRatio(ratio),
        CompressionEmulationUtil.standardizeCompressionRatio(seenRatio),
        1.0D);
  }
Ejemplo n.º 6
0
  private static void ResultJob() throws IOException, InterruptedException, ClassNotFoundException {

    /*depth = 9;
    wasError = true;*/

    conf = new Configuration();
    conf.setLong("my.vertex.num", num);
    if (isErrorOccurred) {
      conf.setBoolean("my.error.was", true);
    }
    fs = FileSystem.get(conf);
    job = Job.getInstance(conf, "Levelized Nested Dissection Result");

    job.setJarByClass(LevNestDissectJob.class);
    job.setReducerClass(LNDResultReducer.class);

    /*out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));
    out_start = out.suffix("/" + outPath_start);*/

    if (wasError) {
      in = out.suffix("/" + outPath_count);
      MultipleInputs.addInputPath(job, in, SequenceFileInputFormat.class, StartVertexMapper.class);
      in_start = out_start;
      MultipleInputs.addInputPath(
          job, in_start, SequenceFileInputFormat.class, LNDResultMapper.class);
    }
    in_vertex = out.suffix("/" + outPath_vertex);
    MultipleInputs.addInputPath(
        job, in_vertex, SequenceFileInputFormat.class, LNDResultMapper.class);

    out =
        new Path(outPath == null ? (FILES_OUT + "result") : (outPath + "/" + "depth_" + "result"));
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);
  }
 @Override
 public Path resolve(Path path) {
   CodecInfo c = getCodecInfo();
   String suffix = c != null ? "." + c.getDefaultSuffix() : "";
   if (path != null) {
     return path.suffix(suffix);
   } else if (StringUtils.hasText(suffix)) {
     return new Path(suffix);
   } else {
     return path;
   }
 }
  @Override
  public RecordWriter<IntWritable, Double2DArrayWritable> getRecordWriter(
      TaskAttemptContext context) throws IOException, InterruptedException {

    // setup variables for image generation
    FileSystem fs = FileSystem.get(context.getConfiguration());
    Path picTempPath = FileOutputFormat.getOutputPath(context);
    fs.mkdirs(picTempPath);
    int k = context.getConfiguration().getInt("k", -1);

    Path imgPath = picTempPath.suffix("/points.png");

    if (k == -1) throw new RuntimeException("k is -1");

    return new PicRecordWriter(imgPath, k, context.getConfiguration());
  }
  /**
   * Test of {@link FileQueue} can identify compressed file and provide readers to extract
   * uncompressed data only if input-compression is enabled.
   */
  @Test
  public void testFileQueueDecompression() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    compressedFile = compressedFile.suffix(".gz");
    // now read back the data from the compressed stream using FileQueue
    long fileSize = lfs.listStatus(compressedFile)[0].getLen();
    CombineFileSplit split =
        new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize});
    FileQueue queue = new FileQueue(split, conf);
    byte[] bytes = new byte[inputLine.getBytes().length];
    queue.read(bytes);
    queue.close();
    String readLine = new String(bytes);
    assertEquals("Compression/Decompression error", inputLine, readLine);
  }
  /**
   * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration,
   * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path,
   * Configuration)}.
   */
  @Test
  public void testPossiblyCompressedDecompressedStreams() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    // now read back the data from the compressed stream
    compressedFile = compressedFile.suffix(".gz");
    InputStream in =
        CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String readLine = reader.readLine();
    assertEquals("Compression/Decompression error", inputLine, readLine);
    reader.close();
  }
Ejemplo n.º 11
0
  private static void DissectionJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.set("my.out.path.vertex", outPath_vertex);
    conf.set("my.out.path.count", outPath_count);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection " + depth);

    job.setJarByClass(LevNestDissectJob.class);
    job.setReducerClass(LevNestDissectReducer.class);

    if (wasStart) {
      in_start = out_start;
      MultipleInputs.addInputPath(
          job, in_start, SequenceFileInputFormat.class, LevNestDissectMapper.class);
    }
    in_vertex = out.suffix("/" + outPath_vertex);
    MultipleInputs.addInputPath(
        job, in_vertex, SequenceFileInputFormat.class, LevNestDissectMapper.class);

    out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);
    MultipleOutputs.addNamedOutput(
        job, "vertex", SequenceFileOutputFormat.class, LongWritable.class, VertexWritable.class);
    MultipleOutputs.addNamedOutput(
        job, "count", SequenceFileOutputFormat.class, LongWritable.class, LongWritable.class);

    job.setMapOutputValueClass(VertexWritable.class);

    job.waitForCompletion(true);

    updated =
        job.getCounters().findCounter(LevNestDissectReducer.UpdatedCounter.UPDATED).getValue();
    // Костыль для непонятной ошибки
    if (notNumbered > 0 && updated == 0) {
      notNumbered_tmp =
          job.getCounters()
              .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED)
              .getValue();
      if (notNumbered_tmp > 0) {
        notNumbered = notNumbered_tmp;
        wasError = false;
        nextDepth();
      } else {
        wasError = true;
        isErrorOccurred = true;
        depth--;
        out =
            new Path(
                outPath == null
                    ? (FILES_OUT + (depth - 1))
                    : (outPath + "/" + "depth_" + (depth - 1)));
        /*depth -= 2;
        out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));*/
      }
    } else {
      wasError = false;
      notNumbered =
          job.getCounters()
              .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED)
              .getValue();
      if (notNumbered > 0) {
        nextDepth();
      }
    }
  }
Ejemplo n.º 12
0
  public void splitLog(final List<ServerName> serverNames) throws IOException {
    long splitTime = 0, splitLogSize = 0;
    List<Path> logDirs = new ArrayList<Path>();
    for(ServerName serverName: serverNames){
      Path logDir = new Path(this.rootdir,
        HLog.getHLogDirectoryName(serverName.toString()));
      Path splitDir = logDir.suffix(HLog.SPLITTING_EXT);
      // rename the directory so a rogue RS doesn't create more HLogs
      if (fs.exists(logDir)) {
        if (!this.fs.rename(logDir, splitDir)) {
          throw new IOException("Failed fs.rename for log split: " + logDir);
        }
        logDir = splitDir;
        LOG.debug("Renamed region directory: " + splitDir);
      } else if (!fs.exists(splitDir)) {
        LOG.info("Log dir for server " + serverName + " does not exist");
        continue;
      }
      logDirs.add(splitDir);
    }

    if (logDirs.isEmpty()) {
      LOG.info("No logs to split");
      return;
    }

    if (distributedLogSplitting) {
      splitLogManager.handleDeadWorkers(serverNames);
      splitTime = EnvironmentEdgeManager.currentTimeMillis();
      splitLogSize = splitLogManager.splitLogDistributed(logDirs);
      splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime;
    } else {
      for(Path logDir: logDirs){
        // splitLogLock ensures that dead region servers' logs are processed
        // one at a time
        this.splitLogLock.lock();
        try {
          HLogSplitter splitter = HLogSplitter.createLogSplitter(
            conf, rootdir, logDir, oldLogDir, this.fs);
          try {
            // If FS is in safe mode, just wait till out of it.
            FSUtils.waitOnSafeMode(conf, conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000));
            splitter.splitLog();
          } catch (OrphanHLogAfterSplitException e) {
            LOG.warn("Retrying splitting because of:", e);
            //An HLogSplitter instance can only be used once.  Get new instance.
            splitter = HLogSplitter.createLogSplitter(conf, rootdir, logDir,
              oldLogDir, this.fs);
            splitter.splitLog();
          }
          splitTime = splitter.getTime();
          splitLogSize = splitter.getSize();
        } finally {
          this.splitLogLock.unlock();
        }
      }
    }

    if (this.metrics != null) {
      this.metrics.addSplit(splitTime, splitLogSize);
    }
  }
Ejemplo n.º 13
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }
Ejemplo n.º 14
0
  // Information needed to get a single file:
  // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM
  private static Vector<Path> getFile(
      FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception {
    Long latestVersion = latestVersion(config, db_util);

    try {
      config.put("timestamp_start", config.get("timestamp_start"));
      config.put("timestamp_real", latestVersion.toString());
      config.put("timestamp_stop", latestVersion.toString());
    } catch (Exception E) {
      logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config));
      return null;
    }
    if (Integer.parseInt(config.get("timestamp_start"))
        > Integer.parseInt(config.get("timestamp_stop"))) {
      return null;
    }
    logger.debug(
        "Getting DB for timestamp "
            + config.get("timestamp_start")
            + " to "
            + config.get("timestamp_stop"));

    String final_result = getFullPath(config);

    String temp_path_base =
        config.get("local_temp_path")
            + "_"
            + config.get("task_id")
            + "_"
            + config.get("run_id")
            + "/";
    Path newPath = new Path(final_result + "*");
    Vector<Path> ret_path = new Vector<Path>();
    String lockName = lock(final_result.replaceAll("/", "_"));
    if (fs.globStatus(newPath).length != 0) {
      ret_path.add(newPath);
      unlock(lockName);
      config.put("full_file_name", final_result);
      return ret_path;
    } else {
      if (!config.get("source").equals("local")) {
        config.put("temp_path_base", temp_path_base);

        config.put("timestamp_start", config.get("timestamp_start"));
        config.put("timestamp_real", latestVersion.toString());
        config.put("timestamp_stop", latestVersion.toString());

        Class<?> sourceClass =
            Class.forName("org.gestore.plugin.source." + config.get("source") + "Source");
        Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class);
        Object processor = sourceClass.newInstance();
        Object retVal;
        try {
          retVal = process_data.invoke(processor, config, fs);
        } catch (InvocationTargetException E) {
          Throwable exception = E.getTargetException();
          logger.error("Unable to call method in child class: " + exception.toString());
          exception.printStackTrace(System.out);
          unlock(lockName);
          return null;
        }
        FileStatus[] files = (FileStatus[]) retVal;
        if (files == null) {
          logger.error("Error getting files, no files returned");
          return null;
        }

        for (FileStatus file : files) {
          Path cur_file = file.getPath();
          Path cur_local_path = new Path(temp_path_base + config.get("file_id"));
          String suffix = getSuffix(config.get("file_id"), cur_file.getName());
          cur_local_path = cur_local_path.suffix(suffix);
          Path res_path = new Path(new String(final_result + suffix));
          logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString());
          if (config.get("copy").equals("true")) {
            fs.moveFromLocalFile(cur_file, res_path);
          } else {
            fs.rename(cur_file, res_path);
          }
        }

        config.put("full_file_name", final_result);
      }
    }
    unlock(lockName);
    return ret_path;
  }
  /** The main entry point if this class is called as a {@link Tool}. */
  @Override
  public int run(String[] args) throws Exception {
    Path inputPath = null;
    Path outputPath = null;

    Configuration conf = getConf();

    // retrieve our paths from the configuration
    inputPath = new Path(conf.get(Util.CONF_LOGDATA_PATH));
    outputPath = new Path(conf.get(Util.CONF_CACHING_SIMULATOR_PATH));

    final int numCores = conf.getInt(Util.CONF_NUM_CORES, Util.DEFAULT_NUM_CORES);
    final int numNodes = conf.getInt(Util.CONF_NUM_NODES, Util.DEFAULT_NUM_NODES);

    NUM_OF_REDUCE_TASKS = numCores * numNodes;

    // set the jobname
    String jobName =
        Util.JOB_NAME
            + " ["
            + CachingTool.ACTION
            + "] {logdata="
            + inputPath.getName()
            + ", session="
            + conf.get(Util.CONF_SESSION_DURATION)
            + "}";

    Util.showStatus("Running " + jobName);

    conf.set("hadoop.job.ugi", Util.HADOOP_USER);
    conf.set("mapred.child.java.opts", "-Xmx1500M -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode");
    conf.set("mapred.task.timeout", "1800000");
    conf.set("mapred.map.tasks.speculative.execution", "false");
    conf.set("mapred.reduce.tasks.speculative.execution", "false");

    FileSystem fs = FileSystem.get(conf);

    Job job = new Job(conf, jobName);

    // set number of reduce tasks
    job.setNumReduceTasks(NUM_OF_REDUCE_TASKS);

    // set mapper, reducer, partitioner and grouping comperator
    job.setJarByClass(CachingTool.class);
    job.setMapperClass(CachingMapper.class);
    job.setReducerClass(CachingReducer.class);
    // GroupingComperator used for Secondary-Sort
    job.setGroupingComparatorClass(TextPair.FirstComparator.class);
    job.setPartitionerClass(TextPair.FirstPartitioner.class);
    job.setOutputKeyClass(TextPair.class);
    job.setOutputValueClass(Text.class);

    // set input and output format
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setMaxInputSplitSize(job, Util.DATASET_MB_SPLIT * 25);
    FileInputFormat.setMinInputSplitSize(job, Util.DATASET_MB_SPLIT * 25);

    // add input path subdirectories if there are any
    ArrayList<Path> inputPaths = Util.getInputDirectories(fs, inputPath);
    int pathsAdded = 0;
    if (inputPaths.size() > 0) {
      for (Path p : inputPaths) {
        if (!p.getName().contains(".") && !p.getName().contains("_")) {
          Util.showStatus("Adding input paths " + p);
          FileInputFormat.addInputPath(job, p);
          pathsAdded++;
        }
      }
    }

    if (pathsAdded == 0) {
      Util.showStatus("Adding input path " + inputPath);
      FileInputFormat.addInputPath(job, inputPath);
    }

    // clear output dir
    fs.delete(outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION)), true);

    FileOutputFormat.setOutputPath(
        job, outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION)));

    // run the job and wait for it to be completed
    boolean b = job.waitForCompletion(true);

    // NOTE! The counters will be written HERE
    // retrieve the counters
    Counter numNewInCache = job.getCounters().findCounter(CachingReducer.CacheCounter.NEW_TO_CACHE);
    Counter numRenewCache = job.getCounters().findCounter(CachingReducer.CacheCounter.RENEW_CACHE);
    Counter numUsedFromCache =
        job.getCounters().findCounter(CachingReducer.CacheCounter.USED_FROM_CACHE);

    // write the counters to the metadata file
    Path headerPath = new Path(conf.get(Util.CONF_CACHING_SIMULATOR_PATH));
    FSDataOutputStream out =
        fs.create(headerPath.suffix("/" + DataSetHeader.SIMULATE_CACHING_METADATA_FILE));
    PrintWriter w = new PrintWriter(out);

    // the sum of all counters equals the sum of all queries in the log file
    w.println("hostnametypeAddedToCache=" + numNewInCache.getValue());
    w.println("queriesAddedAgainToCache=" + numRenewCache.getValue());
    w.println("queriesAnsweredFromCache=" + numUsedFromCache.getValue());

    w.close();
    out.close();

    // Delete all empty output files
    Util.deleteEmptyFiles(fs, outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION)));

    return b ? 1 : 0;
  }