Java FSDataInputStream 예제들, org.apache.hadoop.fs.FSDataInputStream Java 예제들

예제 #1

0

파일 보기

파일: Merger.java 프로젝트: hanjinze/hadoop1-append

 private void init(Counters.Counter readsCounter) throws IOException {
   if (reader == null) {
     FSDataInputStream in = fs.open(file);
     in.seek(segmentOffset);
     reader = new Reader<K, V>(conf, in, segmentLength, codec, readsCounter);
   }
 }

예제 #2

0

파일 보기

파일: TestCopyFiles.java 프로젝트: Jude7/bc-hadoop2.0

  private static boolean checkFiles(
      FileSystem fs, String topdir, MyFile[] files, boolean existingOnly) throws IOException {
    Path root = new Path(topdir);

    for (int idx = 0; idx < files.length; idx++) {
      Path fPath = new Path(root, files[idx].getName());
      try {
        fs.getFileStatus(fPath);
        FSDataInputStream in = fs.open(fPath);
        byte[] toRead = new byte[files[idx].getSize()];
        byte[] toCompare = new byte[files[idx].getSize()];
        Random rb = new Random(files[idx].getSeed());
        rb.nextBytes(toCompare);
        assertEquals("Cannnot read file.", toRead.length, in.read(toRead));
        in.close();
        for (int i = 0; i < toRead.length; i++) {
          if (toRead[i] != toCompare[i]) {
            return false;
          }
        }
        toRead = null;
        toCompare = null;
      } catch (FileNotFoundException fnfe) {
        if (!existingOnly) {
          throw fnfe;
        }
      }
    }

    return true;
  }

예제 #3

0

파일 보기

파일: HDFSTest.java 프로젝트: lizhiyong/flink

  @Test
  public void testHDFS() {

    Path file = new Path(hdfsURI + hdPath);
    org.apache.hadoop.fs.Path result = new org.apache.hadoop.fs.Path(hdfsURI + "/result");
    try {
      FileSystem fs = file.getFileSystem();
      Assert.assertTrue("Must be HadoopFileSystem", fs instanceof HadoopFileSystem);

      DopOneTestEnvironment.setAsContext();
      try {
        WordCount.main(new String[] {file.toString(), result.toString()});
      } catch (Throwable t) {
        t.printStackTrace();
        Assert.fail("Test failed with " + t.getMessage());
      } finally {
        DopOneTestEnvironment.unsetAsContext();
      }

      Assert.assertTrue("No result file present", hdfs.exists(result));

      // validate output:
      org.apache.hadoop.fs.FSDataInputStream inStream = hdfs.open(result);
      StringWriter writer = new StringWriter();
      IOUtils.copy(inStream, writer);
      String resultString = writer.toString();

      Assert.assertEquals("hdfs 10\n" + "hello 10\n", resultString);
      inStream.close();

    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail("Error in test: " + e.getMessage());
    }
  }

예제 #4

0

파일 보기

파일: BlatCommand.java 프로젝트: nilesh-iiita/biopig

  /**
   * copies a file from DFS to local working directory
   *
   * @param dfsPath is the pathname to a file in DFS
   * @return the path of the new file in local scratch space
   * @throws IOException if it can't access the files
   */
  private String copyDBFile(String dfsPath) throws IOException {

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path filenamePath = new Path(dfsPath);
    File localFile = new File(tmpDirFile, filenamePath.getName());

    if (!fs.exists(filenamePath)) {
      throw new IOException("file not found: " + dfsPath);
    }

    FSDataInputStream in = fs.open(filenamePath);
    BufferedReader d = new BufferedReader(new InputStreamReader(in));

    BufferedWriter out = new BufferedWriter(new FileWriter(localFile.getPath()));

    String line;
    line = d.readLine();

    while (line != null) {
      out.write(line + "\n");
      line = d.readLine();
    }
    in.close();
    out.close();

    return localFile.getPath();
  }

예제 #5

0

파일 보기

파일: NaiveBayesModel.java 프로젝트: jokeshen/mahout-rbmClassifier

  public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {
      alphaI = in.readFloat();
      weightsPerFeature = VectorWritable.readVector(in);
      weightsPerLabel = VectorWritable.readVector(in);
      perLabelThetaNormalizer = VectorWritable.readVector(in);

      weightsPerLabelAndFeature =
          new SparseMatrix(weightsPerLabel.size(), weightsPerFeature.size());
      for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
        weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
      }
    } finally {
      Closeables.closeQuietly(in);
    }
    NaiveBayesModel model =
        new NaiveBayesModel(
            weightsPerLabelAndFeature,
            weightsPerFeature,
            weightsPerLabel,
            perLabelThetaNormalizer,
            alphaI);
    model.validate();
    return model;
  }

예제 #6

0

파일 보기

파일: XMLTagInputFormat.java 프로젝트: lordjoe/ChemSpark

    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }

예제 #7

0

파일 보기

파일: ReadingFromHDFS.java 프로젝트: ATAG22/MaxTempMapReduce

  public static void main(String[] args) throws IOException {
    String uri = args[0];

    Configuration configuration = new Configuration();
    System.out.println("Trying to get the file system object");
    URI uriObj = URI.create(uri);
    System.out.println("Got URI object " + uri);
    FileSystem fs = FileSystem.get(uriObj, configuration);
    FSDataInputStream fsDataInputStream = null;

    Path hdfsPath = new Path(uri);

    fsDataInputStream = fs.open(hdfsPath);
    // This specifies the reading starts from the 0th Byte.
    fsDataInputStream.seek(0);
    IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false);
    System.out.println("*******************************************");

    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(hdfsPath)));

    try {
      String line;
      line = br.readLine();
      while (line != null) {
        System.out.println("################ Line is###### " + line);
        // be sure to read the next line otherwise you'll get an infinite loop
        line = br.readLine();
      }
    } finally {
      // you should close out the BufferedReader
      br.close();
    }
  }

예제 #8

0

파일 보기

파일: PrefixEncodedGlobalStatsWithIndex.java 프로젝트: God-lover/Ivory

  public PrefixEncodedGlobalStatsWithIndex(Path prefixSetPath, FileSystem fs) throws IOException {
    fileSys = fs;
    FSDataInputStream termsInput = fileSys.open(prefixSetPath);

    prefixSet.readFields(termsInput);
    termsInput.close();
  }

예제 #9

0

파일 보기

파일: HistoryFileManager.java 프로젝트: 0xmchadha/optHadoop-2.2.0

 private String getJobSummary(FileContext fc, Path path) throws IOException {
   Path qPath = fc.makeQualified(path);
   FSDataInputStream in = fc.open(qPath);
   String jobSummaryString = in.readUTF();
   in.close();
   return jobSummaryString;
 }

예제 #10

0

파일 보기

파일: DeprecatedProtobufFileInputFormat.java 프로젝트: scottfines/elephant-bird

    @Override
    public boolean next(LongWritable key, ProtobufWritable<M> value) throws IOException {
      if (pos_ > end_ || fileIn_.available() <= 0) {
        return false;
      }
      key.set(pos_);

      if (Protobufs.KNOWN_GOOD_POSITION_MARKER.length + pos_ <= end_)
        fileIn_.skipBytes(Protobufs.KNOWN_GOOD_POSITION_MARKER.length);

      pos_ = fileIn_.getPos();

      try {
        if (protoBuilder == null) {
          protoBuilder = Protobufs.getMessageBuilder(typeRef_.getRawClass());
        }

        Message.Builder builder = protoBuilder.clone();
        final boolean success = builder.mergeDelimitedFrom(fileIn_);
        if (success) {
          value.set((M) builder.build());
        }
        return success;
      } catch (InvalidProtocolBufferException e) {
        LOG.error(
            "Invalid Protobuf exception while building " + typeRef_.getRawClass().getName(), e);
      } catch (UninitializedMessageException ume) {
        LOG.error(
            "Uninitialized Message Exception while building " + typeRef_.getRawClass().getName(),
            ume);
      }
      return false;
    }

예제 #11

0

파일 보기

파일: ComputeKernelNoThread.java 프로젝트: ericsonyc/ItelliJ_Projects

 private void readData(
     String path, int length, int start, int SIZE, float[] datas, FileSystem fs) {
   try {
     // FileSystem fs = FileSystem.newInstance(conf);
     FSDataInputStream fileIn = fs.open(new Path(path));
     // fileIn.seek(length * start * SIZE);
     // byte[] temp = new byte[4];
     // String result = "";
     // System.out.println("datalength:" + datas.length);
     byte[] datafully = new byte[datas.length * 4];
     fileIn.read(length * start * SIZE, datafully, 0, datafully.length);
     // fileIn.read(length * start * SIZE, datafully, 0, datafully.length);
     fileIn.close();
     fileIn = null;
     for (int i = 0; i < datas.length; i++) {
       //                for (int j = 0; j < temp.length; j++) {
       //                    temp[j] = datafully[4 * i + j];
       //                }
       // fileIn.read(temp);
       datas[i] = Float.intBitsToFloat(getInt(datafully, i * 4));
       // result += datas[i] + ",";
     }
     // System.out.println(result);
     // fs.close();
     datafully = null;
     System.gc();
     // temp = null;
     // fs = null;
   } catch (Exception e) {
     e.printStackTrace();
   }
 }

예제 #12

0

파일 보기

파일: MutilCharRecordReader.java 프로젝트: sven0726/hive

  public MutilCharRecordReader(FileSplit inputSplit, Configuration job) throws IOException {

    maxLineLength = job.getInt("mapred.mutilCharRecordReader.maxlength", Integer.MAX_VALUE);
    start = inputSplit.getStart();
    end = start + inputSplit.getLength();
    final Path file = inputSplit.getPath();

    compressionCodecs = new CompressionCodecFactory(job);

    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // 打开文件系统
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(file);
    boolean skipFirstLine = false;

    if (codec != null) {
      lineReader = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      lineReader = new LineReader(fileIn, job);
    }

    if (skipFirstLine) {
      start +=
          lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
  }

예제 #13

0

파일 보기

파일: HadoopDataSourceCoreTest.java 프로젝트: Koki-Shimizu/asakusafw

 @Override
 public ModelInput<StringBuilder> createInput(
     Class<? extends StringBuilder> dataType,
     FileSystem fileSystem,
     Path path,
     long offset,
     long fragmentSize,
     Counter counter)
     throws IOException, InterruptedException {
   FileSystem fs = FileSystem.get(path.toUri(), getConf());
   FSDataInputStream in = fs.open(path);
   boolean succeed = false;
   try {
     in.seek(offset);
     ModelInput<StringBuilder> result =
         format.createInput(
             dataType, path.toString(), new CountInputStream(in, counter), offset, fragmentSize);
     succeed = true;
     return result;
   } finally {
     if (succeed == false) {
       in.close();
     }
   }
 }

예제 #14

0

파일 보기

파일: ImageToSequenceFile.java 프로젝트: alecfan/hadoop-images

  public static void main(String args[]) throws Exception {
    if (args.length != 2) {
      System.err.println("argumentos: dir-de-entrada arquivo-de-saida");
      System.exit(1);
    }

    FileSystem fs = FileSystem.get(confHadoop);
    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1] + "/dataset");
    FSDataInputStream in = null;
    SequenceFile.Writer writer = null;
    List<Path> files = listFiles(inPath, jpegFilter);
    try {
      writer = SequenceFile.createWriter(fs, confHadoop, outPath, Text.class, BytesWritable.class);
      for (Path p : files) {
        in = fs.open(p);
        byte buffer[] = new byte[in.available()];
        in.readFully(buffer);
        writer.append(new Text(p.getName()), new BytesWritable(buffer));
        in.close();
      }
    } finally {
      IOUtils.closeStream(writer);
    }
  }

예제 #15

0

파일 보기

파일: ProcedureWALFormat.java 프로젝트: xiaokunliu/hbase

  public static ProcedureWALTrailer readTrailer(FSDataInputStream stream, long startPos, long size)
      throws IOException {
    // Beginning of the Trailer Jump. 17 = 1 byte version + 8 byte magic + 8 byte offset
    long trailerPos = size - 17;

    if (trailerPos < startPos) {
      throw new InvalidWALDataException("Missing trailer: size=" + size + " startPos=" + startPos);
    }

    stream.seek(trailerPos);
    int version = stream.read();
    if (version != TRAILER_VERSION) {
      throw new InvalidWALDataException(
          "Invalid Trailer version. got " + version + " expected " + TRAILER_VERSION);
    }

    long magic = StreamUtils.readLong(stream);
    if (magic != TRAILER_MAGIC) {
      throw new InvalidWALDataException(
          "Invalid Trailer magic. got " + magic + " expected " + TRAILER_MAGIC);
    }

    long trailerOffset = StreamUtils.readLong(stream);
    stream.seek(trailerOffset);

    ProcedureWALEntry entry = readEntry(stream);
    if (entry.getType() != ProcedureWALEntry.Type.PROCEDURE_WAL_EOF) {
      throw new InvalidWALDataException("Invalid Trailer begin");
    }

    ProcedureWALTrailer trailer =
        ProcedureWALTrailer.newBuilder().setVersion(version).setTrackerPos(stream.getPos()).build();
    return trailer;
  }

예제 #16

0

파일 보기

파일: AdhocOfflineService.java 프로젝트: hzlizhh/mdrill

  public static void readHiveResult(String path, OutputStreamWriter outStream, Configuration conf)
      throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(path);
    if (!fs.exists(dir)) {
      throw new IOException("can not found path:" + path);
    }
    FileStatus[] filelist = fs.listStatus(dir);

    Long bytesRead = 0l;
    long maxsize = 1024l * 1024 * 1024 * 10;

    for (FileStatus f : filelist) {
      if (!f.isDir() && !f.getPath().getName().startsWith("_")) {
        FSDataInputStream in = fs.open(f.getPath());
        BufferedReader bf = new BufferedReader(new InputStreamReader(in));
        String line;
        while ((line = bf.readLine()) != null) {
          bytesRead += line.getBytes().length;
          outStream.write(line.replaceAll("\001", ",").replaceAll("\t", ","));
          outStream.write("\r\n");
          if (bytesRead >= maxsize) {
            bf.close();
            in.close();
            return;
          }
        }
        bf.close();
        in.close();
      }
    }
    return;
  }

예제 #17

0

파일 보기

파일: WikipediaPageInputFormat.java 프로젝트: qwaider/pignlproc

    private boolean readUntilMatch(byte[] match, boolean withinBlock) throws IOException {
      int i = 0;
      while (true) {
        int b = fsin.read();
        // end of file:
        if (b == -1) {
          return false;
        }
        // save to buffer:
        if (withinBlock) {
          buffer.write(b);
        }

        // check if we're matching:
        if (b == match[i]) {
          i++;
          if (i >= match.length) {
            return true;
          }
        } else {
          i = 0;
        }
        // see if we've passed the stop point:
        if (!withinBlock && i == 0 && fsin.getPos() >= end) {
          return false;
        }
      }
    }

예제 #18

0

파일 보기

파일: FTPFileSystem.java 프로젝트: shahidminhas/abc

 public FSDataInputStream open(Path file, int bufferSize) throws IOException {
   FTPClient client = connect();
   Path workDir = new Path(client.printWorkingDirectory());
   Path absolute = makeAbsolute(workDir, file);
   FileStatus fileStat = getFileStatus(client, absolute);
   if (fileStat.isDirectory()) {
     disconnect(client);
     throw new IOException("Path " + file + " is a directory.");
   }
   client.allocate(bufferSize);
   Path parent = absolute.getParent();
   // Change to parent directory on the
   // server. Only then can we read the
   // file
   // on the server by opening up an InputStream. As a side effect the working
   // directory on the server is changed to the parent directory of the file.
   // The FTP client connection is closed when close() is called on the
   // FSDataInputStream.
   client.changeWorkingDirectory(parent.toUri().getPath());
   InputStream is = client.retrieveFileStream(file.getName());
   FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, client, statistics));
   if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
     // The ftpClient is an inconsistent state. Must close the stream
     // which in turn will logout and disconnect from FTP server
     fis.close();
     throw new IOException("Unable to open file: " + file + ", Aborting");
   }
   return fis;
 }

예제 #19

0

파일 보기

파일: TestCachingStrategy.java 프로젝트: JoeChien23/hadoop

 static long readHdfsFile(FileSystem fs, Path p, long length, Boolean dropBehind)
     throws Exception {
   FSDataInputStream fis = null;
   long totalRead = 0;
   try {
     fis = fs.open(p);
     if (dropBehind != null) {
       fis.setDropBehind(dropBehind);
     }
     byte buf[] = new byte[8196];
     while (length > 0) {
       int amt = (length > buf.length) ? buf.length : (int) length;
       int ret = fis.read(buf, 0, amt);
       if (ret == -1) {
         return totalRead;
       }
       totalRead += ret;
       length -= ret;
     }
   } catch (IOException e) {
     LOG.error("ioexception", e);
   } finally {
     if (fis != null) {
       fis.close();
     }
   }
   throw new RuntimeException("unreachable");
 }

예제 #20

0

파일 보기

파일: TestCachingStrategy.java 프로젝트: JoeChien23/hadoop

 @Test(timeout = 120000)
 public void testSeekAfterSetDropBehind() throws Exception {
   // start a cluster
   LOG.info("testSeekAfterSetDropBehind");
   Configuration conf = new HdfsConfiguration();
   MiniDFSCluster cluster = null;
   String TEST_PATH = "/test";
   int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
   try {
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
     cluster.waitActive();
     FileSystem fs = cluster.getFileSystem();
     createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false);
     // verify that we can seek after setDropBehind
     FSDataInputStream fis = fs.open(new Path(TEST_PATH));
     try {
       Assert.assertTrue(fis.read() != -1); // create BlockReader
       fis.setDropBehind(false); // clear BlockReader
       fis.seek(2); // seek
     } finally {
       fis.close();
     }
   } finally {
     if (cluster != null) {
       cluster.shutdown();
     }
   }
 }

예제 #21

0

파일 보기

파일: HDFSClient.java 프로젝트: ldiazm/BigData

  public void readFile(String file) throws IOException {
    Configuration conf = new Configuration();
    conf.addResource(new Path("/opt/hadoop-0.20.0/conf/core-site.xml"));

    FileSystem fileSystem = FileSystem.get(conf);

    Path path = new Path(file);
    if (!fileSystem.exists(path)) {
      System.out.println("File " + file + " does not exists");
      return;
    }

    FSDataInputStream in = fileSystem.open(path);

    String filename = file.substring(file.lastIndexOf('/') + 1, file.length());

    OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(filename)));

    byte[] b = new byte[1024];
    int numBytes = 0;
    while ((numBytes = in.read(b)) > 0) {
      out.write(b, 0, numBytes);
    }

    in.close();
    out.close();
    fileSystem.close();
  }

예제 #22

0

파일 보기

파일: HdfsOperater.java 프로젝트: Jude7/BC-BSP

  public static void downloadHdfs(String srcfilePath, String destFilePath) {
    try {
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(URI.create(srcfilePath), conf);
      FSDataInputStream hdfsInStream = fs.open(new Path(srcfilePath));
      File dstFile = new File(destFilePath);
      if (!dstFile.getParentFile().exists()) {
        dstFile.getParentFile().mkdirs();
      }
      OutputStream out = new FileOutputStream(destFilePath);
      byte[] ioBuffer = new byte[1024];
      int readLen = hdfsInStream.read(ioBuffer);

      while (-1 != readLen) {
        out.write(ioBuffer, 0, readLen);
        readLen = hdfsInStream.read(ioBuffer);
      }
      out.close();
      hdfsInStream.close();
      fs.close();
    } catch (FileNotFoundException e) {
      LOG.error("[downloadHdfs]", e);
    } catch (IOException e) {
      LOG.error("[downloadHdfs]", e);
    }
  }

예제 #23

0

파일 보기

파일: KMeansDriver.java 프로젝트: ktzoumas/flink-perf

 public static void createCentersSequenceFile(
     Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath)
     throws Exception {
   Path seqFile = new Path(sequenceFilePath);
   if (fs.exists(seqFile)) {
     fs.delete(seqFile, true);
   }
   FSDataInputStream inputStream = fs.open(new Path(centroidsPath));
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class);
   IntWritable value = new IntWritable(0);
   while (inputStream.available() > 0) {
     String line = inputStream.readLine();
     StringTokenizer tokenizer = new StringTokenizer(line, " ");
     int dim = tokenizer.countTokens() - 1;
     int clusterId = Integer.valueOf(tokenizer.nextToken());
     double[] coords = new double[dim];
     for (int i = 0; i < dim; i++) {
       coords[i] = Double.valueOf(tokenizer.nextToken());
     }
     Centroid cluster = new Centroid(clusterId, new Point(coords));
     writer.append(cluster, value);
   }
   IOUtils.closeStream(writer);
   inputStream.close();
 }

예제 #24

0

파일 보기

파일: FileDataGenNew.java 프로젝트: hwx293926/HiBench

  private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset)
      throws IOException {
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
      Path path = rit.next().getPath();
      String filename =
          path.toString().substring(path.getParent().toString().length(), path.toString().length());

      if (filename.startsWith("/part-")) {
        long filesize = fs.getFileStatus(path).getLen();
        if (offset < filesize) {
          FSDataInputStream handle = fs.open(path);
          if (offset > 0) {
            handle.seek(offset);
          }
          fileHandleList.add(handle);
        }
        offset -= filesize;
      }
    }
    if (fileHandleList.size() == 1) return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
      Enumeration<FSDataInputStream> enu = fileHandleList.elements();
      return new SequenceInputStream(enu);
    } else {
      System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!");
      return null;
    }
  }

예제 #25

0

파일 보기

파일: DFSFileUtil.java 프로젝트: gitriver/alad-phoenix

 /**
  * 按行号为单位按行读取文件，行号以1开始，-1代表无限小或无限大
  *
  * @param filePath 文件路径
  * @param beginIndex 开始行号
  * @param endIndex 结束行号
  * @return
  */
 public static List<String> readFileByLinesNoDup(String filePath, int beginIndex, int endIndex) {
   Set<String> set = new HashSet<String>();
   BufferedReader br = null;
   FSDataInputStream is = null;
   try {
     LOG.info("以行为单位读取文件内容，一次读一整行：");
     is = fs.open(new Path(filePath));
     br = new BufferedReader(new InputStreamReader(is), BUFFER_SIZE);
     String tempString = null;
     int lineindex = 0;
     if (endIndex == -1) {
       while ((tempString = br.readLine()) != null) {
         lineindex++;
         if (lineindex >= beginIndex) set.add(tempString);
       }
     } else {
       while ((tempString = br.readLine()) != null) {
         lineindex++;
         if ((lineindex >= beginIndex) && (lineindex <= endIndex)) set.add(tempString);
       }
     }
     if (is != null) {
       is.close();
     }
     if (br != null) {
       br.close();
     }
   } catch (IOException e) {
     LOG.error("读取文件失败:" + e.getMessage());
     e.printStackTrace();
   }
   List<String> list = new ArrayList<String>(set.size());
   list.addAll(set);
   return list;
 }

예제 #26

0

파일 보기

파일: WikipediaPageInputFormat.java 프로젝트: qwaider/pignlproc

 // to be used for testing
 public WikipediaRecordReader(URL fileURL, long start, long end) throws IOException {
   this.start = start;
   this.end = end;
   Path path = new Path("file://", fileURL.getPath());
   fsin = FileSystem.getLocal(new Configuration()).open(path);
   fsin.seek(start);
   fsin.seek(0);
 }

예제 #27

0

파일 보기

파일: ColumnDefinitionFile.java 프로젝트: rabbaby/mrgeo

 public ColumnDefinitionFile(Path path) throws IOException {
   if (path == null) {
     throw new IOException("A column file path must be specified.");
   }
   FSDataInputStream fdis = path.getFileSystem(HadoopUtils.createConfiguration()).open(path);
   load(fdis);
   fdis.close();
 }

예제 #28

0

파일 보기

파일: Common.java 프로젝트: agsimeonov/cse587

 /**
  * Retrieves the graph root.
  *
  * @return the graph root
  * @throws IOException
  */
 public static String getGraphRoot() throws IOException {
   Configuration conf = setupConf();
   FileSystem fs = FileSystem.get(conf);
   FSDataInputStream in = fs.open(new Path(SPATH_OUTPUT + SPATH_ROOTFILE));
   String username = in.readUTF();
   in.close();
   return username;
 }

예제 #29

0

파일 보기

파일: FileSystemRMStateStore.java 프로젝트: 0xmchadha/optHadoop-2.2.0

 private byte[] readFile(Path inputPath, long len) throws Exception {
   FSDataInputStream fsIn = fs.open(inputPath);
   // state data will not be that "long"
   byte[] data = new byte[(int) len];
   fsIn.readFully(data);
   fsIn.close();
   return data;
 }

예제 #30

0

파일 보기

파일: Fetcher.java 프로젝트: bilal1976/splout-db

  /*
   * Fetch a file that is in a Hadoop file system. Return a local File.
   * Interruptible.
   */
  private File hdfsFetch(Path fromPath, Reporter reporter)
      throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
      FileUtils.deleteDirectory(toDir);
    }
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
      for (FileStatus fStatus : fS.globStatus(fromPath)) {
        log.info("Copying " + fStatus.getPath() + " to " + toPath);
        long bytesSoFar = 0;

        FSDataInputStream iS = fS.open(fStatus.getPath());
        FSDataOutputStream oS = tofS.create(toPath);

        byte[] buffer = new byte[downloadBufferSize];

        int nRead;
        while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
          // Needed to being able to be interrupted at any moment.
          if (Thread.interrupted()) {
            iS.close();
            oS.close();
            cleanDirNoExceptions(toDir);
            throw new InterruptedException();
          }
          bytesSoFar += nRead;
          oS.write(buffer, 0, nRead);
          throttler.incrementAndThrottle(nRead);
          if (bytesSoFar >= bytesToReportProgress) {
            reporter.progress(bytesSoFar);
            bytesSoFar = 0l;
          }
        }

        if (reporter != null) {
          reporter.progress(bytesSoFar);
        }

        oS.close();
        iS.close();
      }

      return toDir;
    } catch (ClosedByInterruptException e) {
      // This can be thrown by the method read.
      cleanDirNoExceptions(toDir);
      throw new InterruptedIOException();
    }
  }