Example #1
0
  /**
   * Delete the specified snapshot
   *
   * @param snapshot
   * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
   * @throws IOException For filesystem IOExceptions
   */
  public void deleteSnapshot(SnapshotDescription snapshot)
      throws SnapshotDoesNotExistException, IOException {
    // check to see if it is completed
    if (!isSnapshotCompleted(snapshot)) {
      throw new SnapshotDoesNotExistException(ProtobufUtil.createSnapshotDesc(snapshot));
    }

    String snapshotName = snapshot.getName();
    // first create the snapshot description and check to see if it exists
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
    // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with
    // just the "name" and it does not contains the "real" snapshot information
    snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);

    // call coproc pre hook
    MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
    if (cpHost != null) {
      cpHost.preDeleteSnapshot(snapshot);
    }

    LOG.debug("Deleting snapshot: " + snapshotName);
    // delete the existing snapshot
    if (!fs.delete(snapshotDir, true)) {
      throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
    }

    // call coproc post hook
    if (cpHost != null) {
      cpHost.postDeleteSnapshot(snapshot);
    }
  }
 private void createDirIfNotExist(Path path) throws IOException {
   if (!fs.exists(path)) {
     if (!fs.mkdirs(path)) {
       throw new IOException("Unable to create: " + path);
     }
   }
 }
  public HdfsDirectory(Path hdfsDirPath, LockFactory lockFactory, Configuration configuration)
      throws IOException {
    super(lockFactory);
    this.hdfsDirPath = hdfsDirPath;
    this.configuration = configuration;
    fileSystem = FileSystem.get(hdfsDirPath.toUri(), configuration);
    fileContext = FileContext.getFileContext(hdfsDirPath.toUri(), configuration);

    if (fileSystem instanceof DistributedFileSystem) {
      // Make sure dfs is not in safe mode
      while (((DistributedFileSystem) fileSystem).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) {
        LOG.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again.");
        try {
          Thread.sleep(5000);
        } catch (InterruptedException e) {
          Thread.interrupted();
          // continue
        }
      }
    }

    try {
      if (!fileSystem.exists(hdfsDirPath)) {
        boolean success = fileSystem.mkdirs(hdfsDirPath);
        if (!success) {
          throw new RuntimeException("Could not create directory: " + hdfsDirPath);
        }
      }
    } catch (Exception e) {
      org.apache.solr.common.util.IOUtils.closeQuietly(fileSystem);
      throw new RuntimeException("Problem creating directory: " + hdfsDirPath, e);
    }
  }
 private void setUnreadableBySuperuserXattrib(Path p) throws IOException {
   if (fs.getScheme().toLowerCase().contains("hdfs")
       && intermediateEncryptionEnabled
       && !fs.getXAttrs(p).containsKey(UNREADABLE_BY_SUPERUSER_XATTRIB)) {
     fs.setXAttr(p, UNREADABLE_BY_SUPERUSER_XATTRIB, null, EnumSet.of(XAttrSetFlag.CREATE));
   }
 }
    @Override
    public void setup(Reducer<IntWritable, Text, NullWritable, NullWritable>.Context context) {
      Configuration conf = context.getConfiguration();
      FileSystem fs;
      try {
        fs = FileSystem.get(conf);
      } catch (Exception e) {
        throw new RuntimeException("Error opening the FileSystem!");
      }

      RetrievalEnvironment env = null;
      try {
        env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
      } catch (IOException e) {
        throw new RuntimeException("Unable to create RetrievalEnvironment!");
      }

      collectionDocumentCount = env.readCollectionDocumentCount();

      try {
        out = fs.create(new Path(env.getTermDocVectorsForwardIndex()), true);
        out.writeInt(env.readDocnoOffset());
        out.writeInt(collectionDocumentCount);
      } catch (Exception e) {
        throw new RuntimeException("Error in creating files!");
      }
    }
  public static void run(Configuration conf, Path input, String outputFile)
      throws IOException, InstantiationException, IllegalAccessException {
    Writer writer;
    if (outputFile == null) {
      writer = new OutputStreamWriter(System.out);
    } else {
      writer =
          new OutputStreamWriter(
              new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8"));
    }

    try {
      FileSystem fs = input.getFileSystem(conf);
      for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) {
        Path dataPath = fst.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf);
        try {
          Text key = reader.getKeyClass().asSubclass(Text.class).newInstance();
          DocumentMapping value = new DocumentMapping();
          while (reader.next(key, value)) {
            String docId = value.getDocId();
            writer.write(docId + "\t" + key + "\n");
          }
        } finally {
          reader.close();
        }
      }
    } finally {
      writer.close();
    }
  }
 /*
  * Returns the tap for inferences
  */
 private Map<String, Tap> getInferencesTap(Scheme scheme) {
   Map<String, Tap> inferencesTap = new HashMap<String, Tap>();
   try {
     String path = null;
     FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration);
     if (mConfiguration.doPredicateIndexing) {
       LiteralFields headStream = ruleStreams.getHeadStream();
       path = distributedFileSystemManager.getInferencesPath(headStream);
       if (fs.exists(new Path(path))) {
         inferencesTap.put(headStream.getId().toString(), new Hfs(scheme, path));
       }
       for (LiteralFields fields : ruleStreams.getBodyStreams()) {
         path = distributedFileSystemManager.getInferencesPath(fields);
         if (fs.exists(new Path(path))) {
           inferencesTap.put(fields.getId().toString(), new Hfs(scheme, path));
         }
       }
     } else {
       path = distributedFileSystemManager.getInferencesPath();
       if (fs.exists(new Path(path))) {
         inferencesTap.put("main", new Hfs(scheme, path));
       }
     }
   } catch (IOException e) {
     logger.error("io exception", e);
     throw new RuntimeException("io exception", e);
   }
   return inferencesTap;
 }
Example #8
0
 public void store(Path output) throws IOException {
   FileSystem fs = output.getFileSystem(HadoopUtils.createConfiguration());
   fs.delete(output, true);
   OutputStream os = fs.create(output, true);
   store(os);
   os.close();
 }
  @BeforeClass
  public static void setUp() throws Exception {

    Configuration conf = new Configuration();
    conf.set("hadoop.security.auth_to_local", "RULE:[2:$1]");
    dfsCluster = new MiniDFSCluster(conf, numSlaves, true, null);
    jConf = new JobConf(conf);
    mrCluster =
        new MiniMRCluster(
            0,
            0,
            numSlaves,
            dfsCluster.getFileSystem().getUri().toString(),
            1,
            null,
            null,
            null,
            jConf);

    createTokenFileJson();
    verifySecretKeysInJSONFile();
    NameNodeAdapter.getDtSecretManager(dfsCluster.getNamesystem()).startThreads();
    FileSystem fs = dfsCluster.getFileSystem();

    p1 = new Path("file1");
    p2 = new Path("file2");

    p1 = fs.makeQualified(p1);
  }
Example #10
0
  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }
  @Test
  public void testRestrictedRead() throws IOException {
    FileSystemPartitionView<TestRecord> partition0 =
        partitioned.getPartitionView(URI.create("id_hash=0"));
    FileSystemPartitionView<TestRecord> partition1 =
        partitioned.getPartitionView(URI.create("id_hash=1"));
    FileSystemPartitionView<TestRecord> partition2 =
        partitioned.getPartitionView(URI.create("id_hash=2"));
    FileSystemPartitionView<TestRecord> partition3 =
        partitioned.getPartitionView(URI.create("id_hash=3"));

    int count0 = DatasetTestUtilities.materialize(partition0).size();
    int total = DatasetTestUtilities.materialize(partitioned).size();
    Assert.assertTrue("Should read some records", count0 > 0);
    Assert.assertTrue("Should not read the entire dataset", count0 < total);

    // move other partitions so they match the partition0 constraint
    FileSystem local = LocalFileSystem.getInstance();
    local.rename(new Path(partition1.getLocation()), new Path(partitioned.getDirectory(), "0"));
    local.rename(
        new Path(partition2.getLocation()), new Path(partitioned.getDirectory(), "hash=0"));
    local.rename(
        new Path(partition3.getLocation()), new Path(partitioned.getDirectory(), "id_hash=00"));

    int newCount0 = DatasetTestUtilities.materialize(partition0).size();
    Assert.assertEquals("Should match original count", count0, newCount0);

    int countByConstraints =
        DatasetTestUtilities.materialize(partition0.toConstraintsView()).size();
    Assert.assertEquals("Should match total count", total, countByConstraints);
  }
  public void testInFlow() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Hfs inTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector collector = inTap.openForWrite(new HadoopFlowProcess());
    collector.add(new TupleEntry(new Fields("value"), new Tuple(BRYAN.build())));
    collector.add(new TupleEntry(new Fields("value"), new Tuple(LUCAS.build())));
    collector.close();

    Pipe inPipe = new Pipe("in");
    Pipe p =
        new Each(
            inPipe,
            new Fields("value"),
            new ExpandProto(Example.Person.class),
            new Fields("id", "name", "email", "position"));

    Hfs sink = new Hfs(new TextLine(), "/tmp/output");
    new HadoopFlowConnector().connect(inTap, sink, p).complete();

    TupleEntryIterator iter = sink.openForRead(new HadoopFlowProcess());
    List<Tuple> results = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      results.add(iter.next().getTupleCopy());
    }
    assertEquals(2, results.size());

    assertEquals(
        new Tuple(0, 1, "bryan", "*****@*****.**", Example.Person.Position.CEO.getNumber())
            .toString(),
        results.get(0).toString());
    assertEquals(new Tuple(25, 2, "lucas", null, null).toString(), results.get(1).toString());
  }
Example #13
0
 /**
  * 디렉토리가 존재하지 않는다면 생성한다.
  *
  * @param directory 디렉토리
  * @param conf Hadoop Configuration
  * @throws java.io.IOException HDFS 작업을 실패한 경우
  */
 public static void makeDirectoryIfNotExists(String directory, Configuration conf)
     throws IOException {
   FileSystem fileSystem = FileSystem.get(conf);
   if (!isExist(conf, directory) && !isDirectory(fileSystem, directory)) {
     fileSystem.mkdirs(new Path(directory));
   }
 }
Example #14
0
  /**
   * HDFS 상에서 지정한 파일을 다른 디렉토리로 파일을 이동시킨다.
   *
   * @param conf Hadoop Configuration
   * @param delayFiles 이동할 파일 목록
   * @param targetDirectory 목적 디렉토리
   * @throws java.io.IOException 파일을 이동할 수 없는 경우
   */
  public static void moveFilesToDirectory(
      Configuration conf, List<String> delayFiles, String targetDirectory) throws IOException {
    for (String path : delayFiles) {
      String filename = FileUtils.getFilename(path);
      String delayedFilePrefix = filename.split("-")[0];
      String outputHead = delayedFilePrefix.replaceAll("delay", "");
      String outputMiddle = delayedFilePrefix.substring(0, 5); // todo
      String outputTail = filename.replaceAll(delayedFilePrefix, "");

      System.out.println(
          "Acceleration Dir "
              + targetDirectory
              + "/"
              + outputHead
              + "_"
              + outputMiddle
              + outputTail);
      makeDirectoryIfNotExists(targetDirectory, conf);

      FileSystem fileSystem = FileSystem.get(conf);
      fileSystem.rename(
          new Path(path),
          new Path(targetDirectory + "/" + outputHead + "_" + outputMiddle + outputTail));

      System.out.println("\t Moved: '" + path + "' --> '" + targetDirectory + "'");
    }
  }
 /** Obtain the owner of the log dir. This is determined by checking the job's log directory. */
 static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException {
   Configuration conf = new Configuration();
   FileSystem raw = FileSystem.getLocal(conf).getRaw();
   Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath());
   FileStatus jobStat = raw.getFileStatus(jobLogDir);
   return jobStat.getOwner();
 }
  @Test
  public void testGetTokensForViewFS() throws IOException, URISyntaxException {
    Configuration conf = new Configuration(jConf);
    FileSystem dfs = dfsCluster.getFileSystem();
    String serviceName = dfs.getCanonicalServiceName();

    Path p1 = new Path("/mount1");
    Path p2 = new Path("/mount2");
    p1 = dfs.makeQualified(p1);
    p2 = dfs.makeQualified(p2);

    conf.set("fs.viewfs.mounttable.default.link./dir1", p1.toString());
    conf.set("fs.viewfs.mounttable.default.link./dir2", p2.toString());
    Credentials credentials = new Credentials();
    Path lp1 = new Path("viewfs:///dir1");
    Path lp2 = new Path("viewfs:///dir2");
    Path[] paths = new Path[2];
    paths[0] = lp1;
    paths[1] = lp2;
    TokenCache.obtainTokensForNamenodesInternal(credentials, paths, conf);

    Collection<Token<? extends TokenIdentifier>> tns = credentials.getAllTokens();
    assertEquals("number of tokens is not 1", 1, tns.size());

    boolean found = false;
    for (Token<? extends TokenIdentifier> tt : tns) {
      System.out.println("token=" + tt);
      if (tt.getKind().equals(DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
          && tt.getService().equals(new Text(serviceName))) {
        found = true;
      }
      assertTrue("didn't find token for [" + lp1 + ", " + lp2 + "]", found);
    }
  }
  /**
   * Create log directory for the given attempt. This involves creating the following and setting
   * proper permissions for the new directories <br>
   * {hadoop.log.dir}/userlogs/<jobid> <br>
   * {hadoop.log.dir}/userlogs/<jobid>/<attempt-id-as-symlink> <br>
   * {one of the mapred-local-dirs}/userlogs/<jobid> <br>
   * {one of the mapred-local-dirs}/userlogs/<jobid>/<attempt-id>
   *
   * @param taskID attempt-id for which log dir is to be created
   * @param isCleanup Is this attempt a cleanup attempt ?
   * @param localDirs mapred local directories
   * @throws IOException
   */
  public static void createTaskAttemptLogDir(
      TaskAttemptID taskID, boolean isCleanup, String[] localDirs) throws IOException {
    String cleanupSuffix = isCleanup ? ".cleanup" : "";
    String strAttemptLogDir = getTaskAttemptLogDir(taskID, cleanupSuffix, localDirs);
    File attemptLogDir = new File(strAttemptLogDir);
    if (!attemptLogDir.mkdirs()) {
      throw new IOException("Creation of " + attemptLogDir + " failed.");
    }
    String strLinkAttemptLogDir =
        getJobDir(taskID.getJobID()).getAbsolutePath()
            + File.separatorChar
            + taskID.toString()
            + cleanupSuffix;
    if (FileUtil.symLink(strAttemptLogDir, strLinkAttemptLogDir) != 0) {
      throw new IOException(
          "Creation of symlink from "
              + strLinkAttemptLogDir
              + " to "
              + strAttemptLogDir
              + " failed.");
    }

    FileSystem localFs = FileSystem.getLocal(new Configuration());
    localFs.setPermission(new Path(attemptLogDir.getPath()), new FsPermission((short) 0700));
  }
Example #18
0
  static {
    try {
      // call newInstance() instead of using a shared instance from a cache
      // to avoid accidentally having it closed by someone else
      FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(CONF), CONF);
      if (!(fs instanceof DistributedFileSystem)) {
        String error =
            "Cannot connect to HDFS. "
                + CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY
                + "("
                + CONF.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY)
                + ")"
                + " might be set incorrectly";
        throw new RuntimeException(error);
      }
      DFS = (DistributedFileSystem) fs;
    } catch (IOException e) {
      throw new RuntimeException("couldn't retrieve FileSystem:\n" + e.getMessage(), e);
    }

    SUPPORTS_VOLUME_ID =
        CONF.getBoolean(
            DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
            DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
  }
  private double[] getSparkModelInfoFromHDFS(Path location, Configuration conf) throws Exception {

    FileSystem fileSystem = FileSystem.get(location.toUri(), conf);
    FileStatus[] files = fileSystem.listStatus(location);

    if (files == null) throw new Exception("Couldn't find Spark Truck ML weights at: " + location);

    ArrayList<Double> modelInfo = new ArrayList<Double>();
    for (FileStatus file : files) {

      if (file.getPath().getName().startsWith("_")) {
        continue;
      }

      InputStream stream = fileSystem.open(file.getPath());

      StringWriter writer = new StringWriter();
      IOUtils.copy(stream, writer, "UTF-8");
      String raw = writer.toString();
      for (String str : raw.split("\n")) {
        modelInfo.add(Double.valueOf(str));
      }
    }

    return Doubles.toArray(modelInfo);
  }
  private void writeIndexDescriptors(ETwinIndexDescriptor ETwinIndexDescriptor) throws IOException {
    Configuration conf = getConf();

    FileSystem fs = (new Path(IndexConfig.index.get()).getFileSystem(conf));

    FileStatus[] fileStats = fs.globStatus(new Path(IndexConfig.index.get(), "*"));

    // We write one indexDescriptor per generated index segment.
    // Something to consider: right now it's a straight-up serialized Thrift object.
    // Would it be better to do the LzoBase64Line thing, so that we can apply our tools?
    // or extend the tools?
    for (int i = 0; i < fileStats.length; i++) {
      ETwinIndexDescriptor.setIndexPart(i);
      FileStatus stat = fileStats[i];
      Path idxPath =
          new Path(stat.getPath().getParent(), "_" + stat.getPath().getName() + ".indexmeta");
      FSDataOutputStream os = fs.create(idxPath, true);
      @SuppressWarnings("unchecked")
      ThriftWritable<ETwinIndexDescriptor> writable =
          (ThriftWritable<ETwinIndexDescriptor>)
              ThriftWritable.newInstance(ETwinIndexDescriptor.getClass());
      writable.set(ETwinIndexDescriptor);
      writable.write(os);
      os.close();
    }
  }
    public QseqRecordReader(Configuration conf, FileSplit split) throws IOException {
      setConf(conf);
      file = split.getPath();
      start = split.getStart();
      end = start + split.getLength();

      FileSystem fs = file.getFileSystem(conf);
      FSDataInputStream fileIn = fs.open(file);

      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
      CompressionCodec codec = codecFactory.getCodec(file);

      if (codec == null) // no codec.  Uncompressed file.
      {
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
      } else { // compressed file
        if (start != 0)
          throw new RuntimeException(
              "Start position for compressed file is not 0! (found " + start + ")");

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
      }

      lineReader = new LineReader(inputStream);
    }
  @Override
  public boolean nextKeyValue() throws IOException, InterruptedException {
    FileSystem fileSystem = FileSystem.get(configuration);

    if (fileSystem.isDirectory(split.getPath())) {
      return false;
    }

    if (fileProcessed) {
      return false;
    }

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FSDataInputStream inputStream = null;

    try {
      inputStream = fileSystem.open(split.getPath());
      IOUtils.readFully(inputStream, result, 0, fileLength);
      currentValue.set(result, 0, fileLength);
    } finally {
      IOUtils.closeStream(inputStream);
    }
    fileProcessed = true;
    return true;
  }
Example #23
0
  @Test
  public void test() throws Exception {
    Connector c = getConnector();
    // make a table
    String tableName = getUniqueNames(1)[0];
    c.tableOperations().create(tableName);
    // write to it
    BatchWriter bw = c.createBatchWriter(tableName, new BatchWriterConfig());
    Mutation m = new Mutation("row");
    m.put("cf", "cq", "value");
    bw.addMutation(m);
    bw.close();

    // create a fake _tmp file in its directory
    String id = c.tableOperations().tableIdMap().get(tableName);
    FileSystem fs = getCluster().getFileSystem();
    Path tmp = new Path("/accumulo/tables/" + id + "/default_tablet/junk.rf_tmp");
    fs.create(tmp).close();
    for (ProcessReference tserver : getCluster().getProcesses().get(ServerType.TABLET_SERVER)) {
      getCluster().killProcess(ServerType.TABLET_SERVER, tserver);
    }
    getCluster().start();

    Scanner scanner = c.createScanner(tableName, Authorizations.EMPTY);
    FunctionalTestUtils.count(scanner);
    assertFalse(fs.exists(tmp));
  }
Example #24
0
  public Path write(Message... messages) throws Exception {

    synchronized (WriteUsingMR.class) {
      outputPath = TestUtils.someTemporaryFilePath();

      Path inputPath = TestUtils.someTemporaryFilePath();
      FileSystem fileSystem = inputPath.getFileSystem(conf);
      fileSystem.create(inputPath);

      inputMessages = Collections.unmodifiableList(Arrays.asList(messages));

      final Job job = new Job(conf, "write");

      // input not really used
      TextInputFormat.addInputPath(job, inputPath);
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(WritingMapper.class);
      job.setNumReduceTasks(0);

      job.setOutputFormatClass(ProtoParquetOutputFormat.class);
      ProtoParquetOutputFormat.setOutputPath(job, outputPath);
      ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages));

      waitForJob(job);

      inputMessages = null;
      return outputPath;
    }
  }
Example #25
0
  public boolean refresh(final Path path) throws IOException {
    try (FileSystem fs = path.getFileSystem(new Configuration())) {
      if (_fileStatus.isPresent()) {
        Optional<FileStatus> oldStatus = this._fileStatus;
        try {
          Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path));
          this.exists = newStatus.isPresent();

          return (oldStatus.isPresent() != this._fileStatus.isPresent()
              || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime()
              || oldStatus.get().isDirectory() != newStatus.get().isDirectory()
              || oldStatus.get().getLen() != newStatus.get().getLen());
        } catch (FileNotFoundException e) {
          _fileStatus = Optional.absent();
          this.exists = false;
          return true;
        }
      } else {
        if (path.getFileSystem(new Configuration()).exists(path)) {
          _fileStatus = Optional.of(fs.getFileStatus(path));
          return true;
        } else {
          return false;
        }
      }
    }
  }
Example #26
0
  public static void readHiveResult(String path, OutputStreamWriter outStream, Configuration conf)
      throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(path);
    if (!fs.exists(dir)) {
      throw new IOException("can not found path:" + path);
    }
    FileStatus[] filelist = fs.listStatus(dir);

    Long bytesRead = 0l;
    long maxsize = 1024l * 1024 * 1024 * 10;

    for (FileStatus f : filelist) {
      if (!f.isDir() && !f.getPath().getName().startsWith("_")) {
        FSDataInputStream in = fs.open(f.getPath());
        BufferedReader bf = new BufferedReader(new InputStreamReader(in));
        String line;
        while ((line = bf.readLine()) != null) {
          bytesRead += line.getBytes().length;
          outStream.write(line.replaceAll("\001", ",").replaceAll("\t", ","));
          outStream.write("\r\n");
          if (bytesRead >= maxsize) {
            bf.close();
            in.close();
            return;
          }
        }
        bf.close();
        in.close();
      }
    }
    return;
  }
Example #27
0
 @Test
 public void testNonDefaultFS() throws IOException {
   FileSystem fs = cluster.getFileSystem();
   Configuration conf = fs.getConf();
   conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
   TestTrash.trashNonDefaultFS(conf);
 }
Example #28
0
  @Override
  public void initialize(InputSplit genericSplit, TaskAttemptContext taskAttemptContext)
      throws IOException {
    context = taskAttemptContext;
    FileSplit fileSplit = (FileSplit) genericSplit;
    lzoFile = fileSplit.getPath();
    // The LzoSplitInputFormat is not splittable, so the split length is the whole file.
    totalFileSize = fileSplit.getLength();

    // Jump through some hoops to create the lzo codec.
    Configuration conf = CompatibilityUtil.getConfiguration(context);
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    LzopDecompressor lzopDecompressor = (LzopDecompressor) codec.createDecompressor();
    FileSystem fs = lzoFile.getFileSystem(conf);
    rawInputStream = fs.open(lzoFile);

    // Creating the LzopInputStream here just reads the lzo header for us, nothing more.
    // We do the rest of our input off of the raw stream is.
    codec.createInputStream(rawInputStream, lzopDecompressor);

    // This must be called AFTER createInputStream is called, because createInputStream
    // is what reads the header, which has the checksum information.  Otherwise getChecksumsCount
    // erroneously returns zero, and all block offsets will be wrong.
    numCompressedChecksums = lzopDecompressor.getCompressedChecksumsCount();
    numDecompressedChecksums = lzopDecompressor.getDecompressedChecksumsCount();
  }
  @Test(timeout = 2000)
  public void testCleanupQueueClosesFilesystem()
      throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException {
    Configuration conf = new Configuration();
    File file = new File("afile.txt");
    file.createNewFile();
    Path path = new Path(file.getAbsoluteFile().toURI());

    FileSystem.get(conf);
    Assert.assertEquals(1, getFileSystemCacheSize());

    // With UGI, should close FileSystem
    CleanupQueue cleanupQueue = new CleanupQueue();
    PathDeletionContext context =
        new PathDeletionContext(path, conf, UserGroupInformation.getLoginUser(), null, null);
    cleanupQueue.addToQueue(context);

    while (getFileSystemCacheSize() > 0) {
      Thread.sleep(100);
    }

    file.createNewFile();
    FileSystem.get(conf);
    Assert.assertEquals(1, getFileSystemCacheSize());

    // Without UGI, should not close FileSystem
    context = new PathDeletionContext(path, conf);
    cleanupQueue.addToQueue(context);

    while (file.exists()) {
      Thread.sleep(100);
    }
    Assert.assertEquals(1, getFileSystemCacheSize());
  }
Example #30
0
  /**
   * Check duplicated tweet IDs in <b>tweetIdDir</b>, and output the duplicates to stdout.
   *
   * @param tweetIdDir
   * @throws Exception
   */
  public static void checkTidDuplicates(String tweetIdDir) throws Exception {
    // First change path strings to URI strings starting with 'file:' or 'hdfs:'
    tweetIdDir = MultiFileFolderWriter.getUriStrForPath(tweetIdDir);

    Set<String> tidSet = new HashSet<String>();
    Configuration conf = HBaseConfiguration.create();
    FileSystem fs = FileSystem.get(new URI(tweetIdDir), conf);
    int dupCount = 0;
    for (FileStatus srcFileStatus : fs.listStatus(new Path(tweetIdDir))) {
      String srcFileName = srcFileStatus.getPath().getName();
      if (srcFileName.endsWith(".txt") && srcFileName.contains("tweetIds")) {
        BufferedReader brTid =
            new BufferedReader(new InputStreamReader(fs.open(srcFileStatus.getPath())));
        String tid = brTid.readLine();
        while (tid != null) {
          if (tidSet.contains(tid)) {
            System.out.println("Duplicated tweet ID: " + tid);
            dupCount++;
          } else {
            tidSet.add(tid);
          }
          tid = brTid.readLine();
        }
        brTid.close();
      }
    }
    System.out.println(
        "Number of unique tweet IDs: " + tidSet.size() + ", number of duplicates: " + dupCount);
  }