Пример #1
1
    private File getHfdsFileToTmpFile(String hdfsPath, HdfsConfiguration configuration) {
      try {
        String fname = hdfsPath.substring(hdfsPath.lastIndexOf('/'));

        File outputDest = File.createTempFile(fname, ".hdfs");
        if (outputDest.exists()) {
          outputDest.delete();
        }

        HdfsInfo hdfsInfo = HdfsInfoFactory.newHdfsInfo(hdfsPath);
        FileSystem fileSystem = hdfsInfo.getFileSystem();
        FileUtil.copy(fileSystem, new Path(hdfsPath), outputDest, false, fileSystem.getConf());
        try {
          FileUtil.copyMerge(
              fileSystem, // src
              new Path(hdfsPath),
              FileSystem.getLocal(new Configuration()), // dest
              new Path(outputDest.toURI()),
              false,
              fileSystem.getConf(),
              null);
        } catch (IOException e) {
          return outputDest;
        }

        return new File(outputDest, fname);
      } catch (IOException ex) {
        throw new RuntimeCamelException(ex);
      }
    }
  /**
   * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5
   * that too, to verify that it all went ok.
   *
   * @param testWithIndex Should we index or not?
   * @param charsToOutput How many characters of random data should we output.
   * @throws IOException
   * @throws NoSuchAlgorithmException
   * @throws InterruptedException
   */
  private void runTest(boolean testWithIndex, int charsToOutput)
      throws IOException, NoSuchAlgorithmException, InterruptedException {

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));

    FileSystem.getLocal(conf).close(); // remove cached filesystem (if any)
    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir_, true);
    localFs.mkdirs(outputDir_);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir_);

    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
      Path lzoFile = new Path(outputDir_, lzoFileName_);
      LzoIndex.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir_);

    List<InputSplit> is = inputFormat.getSplits(job);
    // verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
      assertEquals(3, is.size());
    } else {
      assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
      RecordReader<LongWritable, Text> rr =
          inputFormat.createRecordReader(inputSplit, attemptContext);
      rr.initialize(inputSplit, attemptContext);

      while (rr.nextKeyValue()) {
        Text value = rr.getCurrentValue();

        md5_.update(value.getBytes(), 0, value.getLength());
      }

      rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5_.digest()));
  }
  /** Test deleteOnExit */
  public void testDeleteOnExit() throws IOException {
    Configuration conf = new Configuration();
    if (simulatedStorage) {
      conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
    }
    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    FileSystem fs = cluster.getFileSystem();
    FileSystem localfs = FileSystem.getLocal(conf);

    try {

      // Creates files in HDFS and local file system.
      //
      Path file1 = new Path("filestatus.dat");
      Path file2 = new Path("filestatus2.dat");
      Path file3 = new Path("filestatus3.dat");
      FSDataOutputStream stm1 = createFile(fs, file1, 1);
      FSDataOutputStream stm2 = createFile(fs, file2, 1);
      FSDataOutputStream stm3 = createFile(localfs, file3, 1);
      System.out.println("DeleteOnExit: Created files.");

      // write to files and close. Purposely, do not close file2.
      writeFile(stm1);
      writeFile(stm3);
      stm1.close();
      stm2.close();
      stm3.close();

      // set delete on exit flag on files.
      fs.deleteOnExit(file1);
      fs.deleteOnExit(file2);
      localfs.deleteOnExit(file3);

      // close the file system. This should make the above files
      // disappear.
      fs.close();
      localfs.close();
      fs = null;
      localfs = null;

      // reopen file system and verify that file does not exist.
      fs = cluster.getFileSystem();
      localfs = FileSystem.getLocal(conf);

      assertTrue(file1 + " still exists inspite of deletOnExit set.", !fs.exists(file1));
      assertTrue(file2 + " still exists inspite of deletOnExit set.", !fs.exists(file2));
      assertTrue(file3 + " still exists inspite of deletOnExit set.", !localfs.exists(file3));
      System.out.println("DeleteOnExit successful.");

    } finally {
      IOUtils.closeStream(fs);
      IOUtils.closeStream(localfs);
      cluster.shutdown();
    }
  }
Пример #4
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
  private void verifyJobOutput() throws IOException {

    final String _SUCCESS = "_SUCCESS";
    final String REDUCER_OUTPUT = "part-r-";
    boolean wasSuccessful = false;
    boolean reducerOutputExists = false;
    FileSystem fs = FileSystem.getLocal(new Configuration());
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path(OUTPUT_PATH), false);
    LocatedFileStatus fileStatus = null;
    String fileName = null;

    while (iterator.hasNext()) {
      fileStatus = iterator.next();
      fileName = fileStatus.getPath().getName();

      if (fileName.contains(_SUCCESS)) {
        wasSuccessful = true;
      }
      if (fileName.contains(REDUCER_OUTPUT)) {
        reducerOutputExists = true;
      }
    }

    // verify presence of _SUCCESS file
    Assert.assertEquals(wasSuccessful, true);

    // verify presence of Reducer output
    Assert.assertEquals(reducerOutputExists, true);
  }
Пример #6
0
  @Test
  public final void testCreateAndPurgeExternalTableByExecuteQuery()
      throws IOException, ServiceException {
    TajoConf conf = cluster.getConfiguration();
    final String tableName = "testCreateAndPurgeExternalTableByExecuteQuery";

    Path tablePath = writeTmpTable(tableName);
    assertFalse(client.existTable(tableName));

    String sql =
        "create external table "
            + tableName
            + " (deptname text, score int4) "
            + "using csv location '"
            + tablePath
            + "'";

    client.executeQueryAndGetResult(sql);
    assertTrue(client.existTable(tableName));

    client.updateQuery("drop table " + tableName + " purge");
    assertFalse(client.existTable(tableName));
    FileSystem localFS = FileSystem.getLocal(conf);
    assertFalse(localFS.exists(tablePath));
  }
Пример #7
0
  /** Test that we can generate a file containing the DDL and not import. */
  @Test
  public void testGenerateOnly() throws IOException {
    final String TABLE_NAME = "GenerateOnly";
    setCurTableName(TABLE_NAME);
    setNumCols(1);

    // Figure out where our target generated .q file is going to be.
    SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool());
    Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q");
    FileSystem fs = FileSystem.getLocal(new Configuration());

    // If it's already there, remove it before running the test to ensure
    // that it's the current test that generated the file.
    if (fs.exists(ddlFile)) {
      if (!fs.delete(ddlFile, false)) {
        LOG.warn("Could not delete previous ddl file: " + ddlFile);
      }
    }

    // Run a basic import, but specify that we're just generating definitions.
    String[] types = {"INTEGER"};
    String[] vals = {"42"};
    runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool());

    // Test that the generated definition file exists.
    assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));

    Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME);
    assertFalse("Import actually happened!", fs.exists(hiveImportPath));
  }
Пример #8
0
 /** Obtain the owner of the log dir. This is determined by checking the job's log directory. */
 static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException {
   Configuration conf = new Configuration();
   FileSystem raw = FileSystem.getLocal(conf).getRaw();
   Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath());
   FileStatus jobStat = raw.getFileStatus(jobLogDir);
   return jobStat.getOwner();
 }
  /*
   * public void printPrefixSetContent(){ prefixSet.printCompressedKeys();
   * prefixSet.printKeys(); }
   */
  public static void main(String[] args) throws Exception {
    // String indexPath = "/umd-lin/telsayed/indexes/medline04";
    String indexPath = "c:/Research/ivory-workspace";

    Configuration conf = new Configuration();
    FileSystem fileSys = FileSystem.getLocal(conf);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fileSys);

    Path termsFilePath = new Path(env.getIndexTermsData());

    Path dfByTermFilePath = new Path(env.getDfByTermData());
    Path cfByTermFilePath = new Path(env.getCfByTermData());

    Path idToTermFilePath = new Path(env.getIndexTermIdMappingData());

    System.out.println("PrefixEncodedGlobalStats");

    PrefixEncodedGlobalStatsWithIndex globalStatsMap =
        new PrefixEncodedGlobalStatsWithIndex(termsFilePath);
    System.out.println("PrefixEncodedGlobalStats1");
    globalStatsMap.loadDFStats(dfByTermFilePath, idToTermFilePath, 0.2f, true);
    System.out.println("PrefixEncodedGlobalStats2");
    globalStatsMap.loadCFStats(cfByTermFilePath, idToTermFilePath, 0.2f, false);
    System.out.println("PrefixEncodedGlobalStats3");
    // String[] firstKeys = termIDMap.getDictionary().getFirstKeys(100);
    int nTerms = globalStatsMap.length();
    System.out.println("nTerms: " + nTerms);
    /*for(int i = 0; i < nTerms; i++){

    	PairOfIntLong p = globalStatsMap.getStats(i);
    	System.out.println(i+"\t"+p.getLeftElement() +"\t"+ p.getRightElement());
    	//if(i%10000 == 0) System.out.println(i+" terms so far ("+p+").");
    }*/
    String term;
    term = "0046";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "00565";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "01338";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "01hz";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "03x";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "0278x";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));

    term = "0081";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "0183";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "0244";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    term = "032";
    System.out.println(term + "\t" + globalStatsMap.getDF(term));
    // for(int i = 1; i<=200; i++){
    //	term = termIDMap.getTerm(i);
    //	System.out.println(i+"\t"+term+"\t"+termIDMap.getID(term));
    // }
  }
Пример #10
0
 static {
   try {
     localFs = FileSystem.getLocal(conf);
   } catch (IOException io) {
     throw new RuntimeException("problem getting local fs", io);
   }
 }
  @Test
  public void testRead_SpecificReader() throws IOException {
    GenericRecord savedRecord = new GenericData.Record(schema);
    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord));

    AvroFileReaderFactory<Person> genericReader =
        new AvroFileReaderFactory<Person>(Avros.records(Person.class), new Configuration());
    Iterator<Person> recordIterator =
        genericReader.read(
            FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath()));

    Person expectedPerson = new Person();
    expectedPerson.setAge(42);
    expectedPerson.setName("John Doe");
    List<CharSequence> siblingNames = Lists.newArrayList();
    siblingNames.add("Jimmy");
    siblingNames.add("Jane");
    expectedPerson.setSiblingnames(siblingNames);

    Person person = recordIterator.next();

    assertEquals(expectedPerson, person);
    assertFalse(recordIterator.hasNext());
  }
Пример #12
0
  @Test
  public void runRegression() throws Exception {
    String[] params =
        new String[] {
          "data/gov2/run.gov2.basic.xml",
          "data/gov2/gov2.title.701-775",
          "data/gov2/gov2.title.776-850"
        };

    FileSystem fs = FileSystem.getLocal(new Configuration());

    BatchQueryRunner qr = new BatchQueryRunner(params, fs);

    long start = System.currentTimeMillis();
    qr.runQueries();
    long end = System.currentTimeMillis();

    LOG.info("Total query time: " + (end - start) + "ms");

    verifyAllResults(
        qr.getModels(),
        qr.getAllResults(),
        qr.getDocnoMapping(),
        new Qrels("data/gov2/qrels.gov2.all"));
  }
Пример #13
0
 public void testCloseForErroneousRCFile() throws IOException {
   Configuration conf = new Configuration();
   LocalFileSystem fs = FileSystem.getLocal(conf);
   // create an empty file (which is not a valid rcfile)
   Path path = new Path(System.getProperty("test.build.data", ".") + "/broken.rcfile");
   fs.create(path).close();
   // try to create RCFile.Reader
   final TestFSDataInputStream[] openedFile = new TestFSDataInputStream[1];
   try {
     new RCFile.Reader(fs, path, conf) {
       // this method is called by the RCFile.Reader constructor, overwritten,
       // so we can access the opened file
       protected FSDataInputStream openFile(FileSystem fs, Path file, int bufferSize, long length)
           throws IOException {
         final InputStream in = super.openFile(fs, file, bufferSize, length);
         openedFile[0] = new TestFSDataInputStream(in);
         return openedFile[0];
       }
     };
     fail("IOException expected.");
   } catch (IOException expected) {
   }
   assertNotNull(path + " should have been opened.", openedFile[0]);
   assertTrue("InputStream for " + path + " should have been closed.", openedFile[0].isClosed());
 }
Пример #14
0
  public SpillRecord(Path indexFileName, JobConf job, Checksum crc, String expectedIndexOwner)
      throws IOException {

    final FileSystem rfs = FileSystem.getLocal(job).getRaw();
    final DataInputStream in =
        new DataInputStream(
            SecureIOUtils.openForRead(
                new File(indexFileName.toUri().getPath()), expectedIndexOwner, null));
    try {
      final long length = rfs.getFileStatus(indexFileName).getLen();
      final int partitions = (int) length / MAP_OUTPUT_INDEX_RECORD_LENGTH;
      final int size = partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH;

      buf = ByteBuffer.allocate(size);
      if (crc != null) {
        crc.reset();
        CheckedInputStream chk = new CheckedInputStream(in, crc);
        IOUtils.readFully(chk, buf.array(), 0, size);
        if (chk.getChecksum().getValue() != in.readLong()) {
          throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1);
        }
      } else {
        IOUtils.readFully(in, buf.array(), 0, size);
      }
      entries = buf.asLongBuffer();
    } finally {
      in.close();
    }
  }
 private static <T extends WritableComparable> Path writePartitionFile(
     String testname, JobConf conf, T[] splits) throws IOException {
   final FileSystem fs = FileSystem.getLocal(conf);
   final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
   Path p = new Path(testdir, testname + "/_partition.lst");
   TotalOrderPartitioner.setPartitionFile(conf, p);
   conf.setNumReduceTasks(splits.length + 1);
   SequenceFile.Writer w = null;
   try {
     NullWritable nw = NullWritable.get();
     w =
         SequenceFile.createWriter(
             fs,
             conf,
             p,
             splits[0].getClass(),
             NullWritable.class,
             SequenceFile.CompressionType.NONE);
     for (int i = 0; i < splits.length; ++i) {
       w.append(splits[i], NullWritable.get());
     }
   } finally {
     if (null != w) w.close();
   }
   return p;
 }
Пример #16
0
 /**
  * Simulate the <code>dfs.name.dir</code> or <code>dfs.data.dir</code> of a populated DFS
  * filesystem.
  *
  * <p>This method creates and populates the directory specified by <code>parent/dirName</code>,
  * for each parent directory. The contents of the new directories will be appropriate for the
  * given node type. If the directory does not exist, it will be created. If the directory already
  * exists, it will first be deleted.
  *
  * <p>By default, a singleton master populated storage directory is created for a Namenode
  * (contains edits, fsimage, version, and time files) and a Datanode (contains version and block
  * files). These directories are then copied by this method to create new storage directories of
  * the appropriate type (Namenode or Datanode).
  *
  * @return the array of created directories
  */
 public static File[] createStorageDirs(NodeType nodeType, String[] parents, String dirName)
     throws Exception {
   File[] retVal = new File[parents.length];
   for (int i = 0; i < parents.length; i++) {
     File newDir = new File(parents[i], dirName);
     createEmptyDirs(new String[] {newDir.toString()});
     LocalFileSystem localFS = FileSystem.getLocal(new Configuration());
     switch (nodeType) {
       case NAME_NODE:
         localFS.copyToLocalFile(
             new Path(namenodeStorage.toString(), "current"), new Path(newDir.toString()), false);
         Path newImgDir = new Path(newDir.getParent(), "image");
         if (!localFS.exists(newImgDir))
           localFS.copyToLocalFile(
               new Path(namenodeStorage.toString(), "image"), newImgDir, false);
         break;
       case DATA_NODE:
         localFS.copyToLocalFile(
             new Path(datanodeStorage.toString(), "current"), new Path(newDir.toString()), false);
         Path newStorageFile = new Path(newDir.getParent(), "storage");
         if (!localFS.exists(newStorageFile))
           localFS.copyToLocalFile(
               new Path(datanodeStorage.toString(), "storage"), newStorageFile, false);
         break;
     }
     retVal[i] = newDir;
   }
   return retVal;
 }
Пример #17
0
  // Mostly for setting up the symlinks. Note that when we setup the distributed
  // cache, we didn't create the symlinks. This is done on a per task basis
  // by the currently executing task.
  public static void setupWorkDir(JobConf conf) throws IOException {
    File workDir = new File(".").getAbsoluteFile();
    FileUtil.fullyDelete(workDir);
    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localArchives[i].toString(), link);
            }
          }
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localFiles[i].toString(), link);
            }
          }
        }
      }
    }
    File jobCacheDir = null;
    if (conf.getJar() != null) {
      jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
    }

    // create symlinks for all the files in job cache dir in current
    // workingdir for streaming
    try {
      DistributedCache.createAllSymlink(conf, jobCacheDir, workDir);
    } catch (IOException ie) {
      // Do not exit even if symlinks have not been created.
      LOG.warn(StringUtils.stringifyException(ie));
    }
    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute
    // prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
  }
Пример #18
0
  private ArrayList<String> init(File newFile, List<String> uuids, List<Integer> dataVersions)
      throws IllegalArgumentException, IOException {
    String base = newFile.toURI().toString();

    LocalFileSystem fs = FileSystem.getLocal(new Configuration());

    ArrayList<String> accumuloPaths = new ArrayList<String>();

    for (int i = 0; i < uuids.size(); i++) {
      String volume = "v" + i;

      String accumuloPath = base + "/" + volume + "/accumulo";
      accumuloPaths.add(accumuloPath);

      if (uuids.get(i) != null) {
        fs.mkdirs(new Path(accumuloPath + "/" + ServerConstants.INSTANCE_ID_DIR));
        fs.createNewFile(
            new Path(accumuloPath + "/" + ServerConstants.INSTANCE_ID_DIR + "/" + uuids.get(i)));
      }

      if (dataVersions.get(i) != null) {
        fs.mkdirs(new Path(accumuloPath + "/" + ServerConstants.VERSION_DIR));
        fs.createNewFile(
            new Path(accumuloPath + "/" + ServerConstants.VERSION_DIR + "/" + dataVersions.get(i)));
      }
    }

    return accumuloPaths;
  }
  protected void setupCluster(boolean simulated, long minFileSize, String[] racks, String[] hosts)
      throws IOException {
    conf = new Configuration();
    localFileSys = FileSystem.getLocal(conf);
    conf.setLong("dfs.blockreport.intervalMsec", 1000L);
    conf.set("dfs.replication.pending.timeout.sec", "2");
    conf.setLong("dfs.block.size", 1L);
    conf.set(
        "dfs.block.replicator.classname",
        "org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid");
    conf.setLong("hdfs.raid.min.filesize", minFileSize);
    Utils.loadTestCodecs(conf, 5, 5, 1, 3, "/raid", "/raidrs", false, true);
    conf.setInt("io.bytes.per.checksum", 1);
    excludeFile = new Path(TEST_DIR, "exclude" + System.currentTimeMillis());
    cleanFile(excludeFile);
    conf.set("dfs.hosts.exclude", excludeFile.toUri().getPath());
    writeConfigFile(excludeFile, null);

    if (!simulated) {
      cluster = new MiniDFSCluster(conf, hosts.length, true, racks, hosts);
    } else {
      long[] capacities = new long[] {CAPACITY, CAPACITY, CAPACITY};
      cluster = new MiniDFSCluster(0, conf, hosts.length, true, true, null, racks, capacities);
    }
    cluster.waitActive();
    namesystem = cluster.getNameNode().getNamesystem();
    Assert.assertTrue(
        "BlockPlacementPolicy type is not correct.",
        namesystem.replicator instanceof BlockPlacementPolicyRaid);
    policy = (BlockPlacementPolicyRaid) namesystem.replicator;
    fs = cluster.getFileSystem();
    dfs = (DistributedFileSystem) fs;
    TestDirectoryRaidDfs.setupStripeStore(conf, fs);
  }
Пример #20
0
  @Test
  public void testBracketedCounters() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs);

    final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestBracketedCounters");
    lfs.delete(tempDir, true);

    final Path topologyPath = new Path(tempDir, "dispatch-topology.json");
    final Path tracePath = new Path(tempDir, "dispatch-trace.json");

    final Path inputPath = new Path(rootInputPath, "counters-format-test-logs");

    System.out.println("topology result file = " + topologyPath);
    System.out.println("testBracketedCounters() trace result file = " + tracePath);

    final Path goldPath = new Path(rootInputPath, "counters-test-trace.json.gz");

    String[] args = {tracePath.toString(), topologyPath.toString(), inputPath.toString()};

    Tool analyzer = new TraceBuilder();
    int result = ToolRunner.run(analyzer, args);
    assertEquals("Non-zero exit", 0, result);

    TestRumenJobTraces.<LoggedJob>jsonFileMatchesGold(
        conf, tracePath, goldPath, LoggedJob.class, "trace");
  }
Пример #21
0
  public static void main(String[] args) throws IOException {
    if (args.length < 1) {
      System.out.println("args: [path] [max-num-of-records-per-file]");
      System.exit(-1);
    }

    String f = args[0];

    int max = Integer.MAX_VALUE;
    if (args.length >= 2) {
      max = Integer.parseInt(args[1]);
    }

    boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false;

    if (useLocal) {
      System.out.println("Reading from local filesystem");
    }

    FileSystem fs =
        useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration());
    Path p = new Path(f);

    if (fs.getFileStatus(p).isDir()) {
      readSequenceFilesInDir(p, fs, max);
    } else {
      readSequenceFile(p, fs, max);
    }
  }
Пример #22
0
 private static final FileFragment getFileFragment(String fileName) throws IOException {
   TajoConf conf = new TajoConf();
   Path tablePath = new Path(getResourcePath("dataset", "TestDelimitedTextFile"), fileName);
   FileSystem fs = FileSystem.getLocal(conf);
   FileStatus status = fs.getFileStatus(tablePath);
   return new FileFragment("table", tablePath, 0, status.getLen());
 }
Пример #23
0
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
 @SuppressWarnings("deprecation")
 private void deleteLocalFiles(String subdir) throws IOException {
   String[] localDirs = getLocalDirs();
   for (int i = 0; i < localDirs.length; i++) {
     FileSystem.getLocal(getConf()).delete(new Path(localDirs[i], subdir));
   }
 }
Пример #25
0
  /**
   * Create log directory for the given attempt. This involves creating the following and setting
   * proper permissions for the new directories <br>
   * {hadoop.log.dir}/userlogs/<jobid> <br>
   * {hadoop.log.dir}/userlogs/<jobid>/<attempt-id-as-symlink> <br>
   * {one of the mapred-local-dirs}/userlogs/<jobid> <br>
   * {one of the mapred-local-dirs}/userlogs/<jobid>/<attempt-id>
   *
   * @param taskID attempt-id for which log dir is to be created
   * @param isCleanup Is this attempt a cleanup attempt ?
   * @param localDirs mapred local directories
   * @throws IOException
   */
  public static void createTaskAttemptLogDir(
      TaskAttemptID taskID, boolean isCleanup, String[] localDirs) throws IOException {
    String cleanupSuffix = isCleanup ? ".cleanup" : "";
    String strAttemptLogDir = getTaskAttemptLogDir(taskID, cleanupSuffix, localDirs);
    File attemptLogDir = new File(strAttemptLogDir);
    if (!attemptLogDir.mkdirs()) {
      throw new IOException("Creation of " + attemptLogDir + " failed.");
    }
    String strLinkAttemptLogDir =
        getJobDir(taskID.getJobID()).getAbsolutePath()
            + File.separatorChar
            + taskID.toString()
            + cleanupSuffix;
    if (FileUtil.symLink(strAttemptLogDir, strLinkAttemptLogDir) != 0) {
      throw new IOException(
          "Creation of symlink from "
              + strLinkAttemptLogDir
              + " to "
              + strAttemptLogDir
              + " failed.");
    }

    FileSystem localFs = FileSystem.getLocal(new Configuration());
    localFs.setPermission(new Path(attemptLogDir.getPath()), new FsPermission((short) 0700));
  }
  public void testFormat() throws Exception {
    localFs = FileSystem.getLocal(defaultConf);
    localFs.delete(workDir, true);

    Job job = new Job(new Configuration(defaultConf));
    Path file = new Path(workDir, "test.txt");

    int seed = new Random().nextInt();
    Random random = new Random(seed);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
      // create a file with length entries
      Writer writer = new OutputStreamWriter(localFs.create(file));
      try {
        MyClass mc = new MyClass();
        for (int i = 0; i < length; i++) {
          mc.s = Integer.toString(i);
          mc.v = i;
          byte[] raw = MessagePack.pack(mc);
          byte[] b64e = base64_.encodeBase64(raw);
          byte[] b64d = base64_.decode(b64e);
          MyClass mc2 = MessagePack.unpack(b64d, mc.getClass());
          assertEquals(mc.s, mc2.s);
          assertEquals(mc.v, mc2.v);

          writer.write(base64_.encodeToString(raw));
        }
      } finally {
        writer.close();
      }
      checkFormat(job);
    }
  }
Пример #27
0
    public Job(JobID jobid, String jobSubmitDir) throws IOException {
      this.systemJobDir = new Path(jobSubmitDir);
      this.systemJobFile = new Path(systemJobDir, "job.xml");
      this.id = jobid;

      this.localFs = FileSystem.getLocal(conf);

      this.localJobDir = localFs.makeQualified(conf.getLocalPath(jobDir));
      this.localJobFile = new Path(this.localJobDir, id + ".xml");

      // Manage the distributed cache.  If there are files to be copied,
      // this will trigger localFile to be re-written again.
      this.trackerDistributedCacheManager =
          new TrackerDistributedCacheManager(conf, taskController);
      this.taskDistributedCacheManager =
          trackerDistributedCacheManager.newTaskDistributedCacheManager(jobid, conf);
      taskDistributedCacheManager.setupCache(conf, "archive", "archive");

      if (DistributedCache.getSymlink(conf)) {
        // This is not supported largely because,
        // for a Child subprocess, the cwd in LocalJobRunner
        // is not a fresh slate, but rather the user's working directory.
        // This is further complicated because the logic in
        // setupWorkDir only creates symlinks if there's a jarfile
        // in the configuration.
        LOG.warn("LocalJobRunner does not support " + "symlinking into current working dir.");
      }
      // Setup the symlinks for the distributed cache.
      TaskRunner.setupWorkDir(conf, new File(localJobDir.toUri()).getAbsoluteFile());

      // Write out configuration file.  Instead of copying it from
      // systemJobFile, we re-write it, since setup(), above, may have
      // updated it.
      OutputStream out = localFs.create(localJobFile);
      try {
        conf.writeXml(out);
      } finally {
        out.close();
      }
      this.job = new JobConf(localJobFile);

      // Job (the current object) is a Thread, so we wrap its class loader.
      if (!taskDistributedCacheManager.getClassPaths().isEmpty()) {
        setContextClassLoader(taskDistributedCacheManager.makeClassLoader(getContextClassLoader()));
      }

      profile =
          new JobProfile(
              job.getUser(),
              id,
              systemJobFile.toString(),
              "http://localhost:8080/",
              job.getJobName());
      status = new JobStatus(id, 0.0f, 0.0f, JobStatus.RUNNING);

      jobs.put(id, this);

      this.start();
    }
Пример #28
0
 /**
  * If libjars are set in the conf, parse the libjars.
  *
  * @param conf
  * @return libjar urls
  * @throws IOException
  */
 public static URL[] getLibJars(Configuration conf) throws IOException {
   String jars = conf.get("tmpjars");
   if (jars == null) {
     return null;
   }
   String[] files = jars.split(",");
   List<URL> cp = new ArrayList<URL>();
   for (String file : files) {
     Path tmp = new Path(file);
     if (tmp.getFileSystem(conf).equals(FileSystem.getLocal(conf))) {
       cp.add(FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL());
     } else {
       LOG.warn("The libjars file " + tmp + " is not on the local " + "filesystem. Ignoring.");
     }
   }
   return cp.toArray(new URL[0]);
 }
Пример #29
0
 static {
   try {
     defaultConf.set("fs.default.name", "file:///");
     localFs = FileSystem.getLocal(defaultConf);
   } catch (IOException e) {
     throw new RuntimeException("init failure", e);
   }
 }
 // to be used for testing
 public WikipediaRecordReader(URL fileURL, long start, long end) throws IOException {
   this.start = start;
   this.end = end;
   Path path = new Path("file://", fileURL.getPath());
   fsin = FileSystem.getLocal(new Configuration()).open(path);
   fsin.seek(start);
   fsin.seek(0);
 }