Esempio n. 1
0
 private void close() throws IOException {
   for (SequenceFile.Writer writer : writers.values()) {
     writer.close();
   }
   writers.clear();
   LOG.info("closed writer");
 }
Esempio n. 2
0
  private static void createControlFile(
      FileSystem fs,
      int fileSize, // in MB
      int nrFiles)
      throws IOException {
    LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");

    fs.delete(CONTROL_DIR, true);

    for (int i = 0; i < nrFiles; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer =
            SequenceFile.createWriter(
                fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(fileSize));
      } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
        if (writer != null) writer.close();
        writer = null;
      }
    }
    LOG.info("created control files for: " + nrFiles + " files");
  }
  public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed)
      throws Exception {

    LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files");

    Path controlFile = new Path(CONTROL_DIR, "files");
    fs.delete(controlFile, true);
    Random random = new Random(seed);

    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE);

    long totalSize = 0;
    long maxSize = ((megaBytes / numFiles) * 2) + 1;
    try {
      while (totalSize < megaBytes) {
        Text name = new Text(Long.toString(random.nextLong()));

        long size = random.nextLong();
        if (size < 0) size = -size;
        size = size % maxSize;

        // LOG.info(" adding: name="+name+" size="+size);

        writer.append(name, new LongWritable(size));

        totalSize += size;
      }
    } finally {
      writer.close();
    }
    LOG.info("created control file for: " + totalSize + " bytes");
  }
  @Override
  public boolean writeData(String uri, byte[] data) {
    /*
     * Delete the parent folder if the parent folder exists
     * */

    File f = new File(uri);
    if (f.getName().equals(storageConfiguration.getProperty("postfix"))) {
      f = f.getParentFile();
      Path file = new Path(String.valueOf(f));
      try {
        if (fileSystem.exists(file)) {
          fileSystem.delete(file, true);
        }
      } catch (IOException e) {
        e.printStackTrace();
      }
    }

    SequenceFile.Writer writer = getWriterFor(uri);
    HDFSByteChunk byteChunk = new HDFSByteChunk(data, uri);
    try {
      writer.append(new IntWritable(0), byteChunk);
      writer.close();
    } catch (IOException e) {
      e.printStackTrace();
      return false;
    }
    return true;
  }
Esempio n. 5
0
  /**
   * Create a data file in SequenceFile format that gets exported to the db.
   *
   * @param fileNum the number of the file (for multi-file export).
   * @param numRecords how many records to write to the file.
   * @param className the table class name to instantiate and populate for each record.
   */
  private void createSequenceFile(int fileNum, int numRecords, String className)
      throws IOException {

    try {
      // Instantiate the value record object via reflection.
      Class cls = Class.forName(className, true, Thread.currentThread().getContextClassLoader());
      SqoopRecord record = (SqoopRecord) ReflectionUtils.newInstance(cls, new Configuration());

      // Create the SequenceFile.
      Configuration conf = new Configuration();
      conf.set("fs.default.name", "file:///");
      FileSystem fs = FileSystem.get(conf);
      Path tablePath = getTablePath();
      Path filePath = new Path(tablePath, "part" + fileNum);
      fs.mkdirs(tablePath);
      SequenceFile.Writer w =
          SequenceFile.createWriter(fs, conf, filePath, LongWritable.class, cls);

      // Now write the data.
      int startId = fileNum * numRecords;
      for (int i = 0; i < numRecords; i++) {
        record.parse(getRecordLine(startId + i));
        w.append(new LongWritable(startId + i), record);
      }

      w.close();
    } catch (ClassNotFoundException cnfe) {
      throw new IOException(cnfe);
    } catch (RecordParser.ParseError pe) {
      throw new IOException(pe);
    }
  }
  @Override
  public boolean write(String uri, InputStream stream) {
    SequenceFile.Writer writer = getWriterFor(uri);

    try {
      int size = stream.available();
      byte[] bytes = new byte[size];
      int readBytes = stream.read(bytes);
      if (readBytes != size) {
        log.error(
            "Could not read all the bytes from the inputStream. Read "
                + readBytes
                + " instead of "
                + size);
        return false;
      }
      HDFSByteChunk byteChunk = new HDFSByteChunk(bytes, uri);
      writer.append(new IntWritable(0), byteChunk);
      writer.close();
      return true;
    } catch (IOException e) {
      e.printStackTrace();
      return false;
    }
  }
  @Override
  public boolean write(String uri, List<InputStream> streams) {
    boolean result = false;
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    try {
      for (InputStream stream : streams) {

        int size = stream.available();
        byte[] bytes = new byte[size];
        int readBytes = stream.read(bytes);
        if (readBytes != size) {
          log.error(
              "Could not read all the bytes from the inputStream. Read "
                  + readBytes
                  + " instead of "
                  + size);
          return false;
        }
        outputStream.write(bytes);
      }
      HDFSByteChunk byteChunk = new HDFSByteChunk(outputStream.toByteArray(), uri);
      SequenceFile.Writer writer = getWriterFor(uri);
      writer.append(new IntWritable(0), byteChunk);
      writer.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
    return result;
  }
  @SuppressWarnings("deprecation")
  private void createControlFile(
      FileSystem fs,
      long nrBytes, // in bytes
      int nrFiles)
      throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");

    Path controlDir = getControlDir(config);
    fs.delete(controlDir, true);

    for (int i = 0; i < nrFiles; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(controlDir, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer =
            SequenceFile.createWriter(
                fs, config, controlFile, Text.class, LongWritable.class, CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(nrBytes));
      } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
        if (writer != null) writer.close();
        writer = null;
      }
    }
    LOG.info("created control files for: " + nrFiles + " files");
  }
  private void writeSeqenceFileTest(
      FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec)
      throws IOException {

    byte[][] columnRandom;

    resetRandomGenerators();

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
      BytesRefWritable cu = new BytesRefWritable();
      bytes.set(i, cu);
    }

    // zero length key is not allowed by block compress writer, so we use a byte
    // writable
    ByteWritable key = new ByteWritable();
    SequenceFile.Writer seqWriter =
        SequenceFile.createWriter(
            fs,
            conf,
            file,
            ByteWritable.class,
            BytesRefArrayWritable.class,
            CompressionType.BLOCK,
            codec);

    for (int i = 0; i < rowCount; i++) {
      nextRandomRow(columnRandom, bytes);
      seqWriter.append(key, bytes);
    }
    seqWriter.close();
  }
  /**
   * Write out a SequenceFile that can be read by TotalOrderPartitioner that contains the split
   * points in startKeys.
   *
   * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to copy it
   * because it's private.
   *
   * @param conf The job configuration.
   * @param partitionsPath output path for SequenceFile.
   * @param startKeys the region start keys to use as the partitions.
   * @throws IOException If there is an error.
   */
  private static void writePartitionFile(
      Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
      throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0.
    TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>();
    sorted.addAll(startKeys);

    HFileKeyValue first = sorted.first();
    if (0 != first.getRowKey().length) {
      throw new IllegalArgumentException(
          "First region of table should have empty start row key. Instead has: "
              + Bytes.toStringBinary(first.getRowKey()));
    }
    sorted.remove(first);

    // Write the actual file
    final SequenceFile.Writer writer =
        KijiMRPlatformBridge.get()
            .newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class);

    try {
      for (HFileKeyValue startKey : sorted) {
        writer.append(startKey, NullWritable.get());
      }
    } finally {
      writer.close();
    }
  }
  /** creates the input file (containing the names of the files to be fixed */
  private List<String> createInputFile(
      String jobName, Path inDir, Map<String, Integer> corruptFilePriority, int priority)
      throws IOException {
    Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
    FileSystem fs = file.getFileSystem(getConf());
    SequenceFile.Writer fileOut =
        SequenceFile.createWriter(fs, getConf(), file, LongWritable.class, Text.class);
    long index = 0L;

    List<String> filesAdded = new ArrayList<String>();
    int count = 0;
    final long max = filesPerTask * BLOCKFIX_TASKS_PER_JOB;
    for (Map.Entry<String, Integer> entry : corruptFilePriority.entrySet()) {
      if (entry.getValue() != priority) {
        continue;
      }
      if (count >= max) {
        break;
      }
      String corruptFileName = entry.getKey();
      fileOut.append(new LongWritable(index++), new Text(corruptFileName));
      filesAdded.add(corruptFileName);
      count++;

      if (index % filesPerTask == 0) {
        fileOut.sync(); // create sync point to make sure we can split here
      }
    }

    fileOut.close();
    return filesAdded;
  }
Esempio n. 12
0
 /**
  * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a
  * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes
  * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.
  */
 @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
 public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
     throws IOException, ClassNotFoundException, InterruptedException {
   Configuration conf = job.getConfiguration();
   final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
   int numPartitions = job.getNumReduceTasks();
   K[] samples = sampler.getSample(inf, job);
   RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
   Arrays.sort(samples, comparator);
   Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
   FileSystem fs = dst.getFileSystem(conf);
   if (fs.exists(dst)) {
     fs.delete(dst, false);
   }
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
   NullWritable nullValue = NullWritable.get();
   float stepSize = samples.length / (float) numPartitions;
   int last = -1;
   for (int i = 1; i < numPartitions; ++i) {
     int k = Math.round(stepSize * i);
     while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
       ++k;
     }
     writer.append(samples[k], nullValue);
     last = k;
   }
   writer.close();
 }
  public static void writeCanopyCenters(Configuration conf, ArrayList<StockVector> canopyCenters)
      throws IOException {
    FileSystem fs = FileSystem.get(conf);

    IntWritable IntKey = new IntWritable(1);
    Path canopyFileName = new Path(Nasdaq.CANOPY_SEQ_FILE_PATH);

    System.out.println("before seq file");
    // create file
    @SuppressWarnings("deprecation")
    final SequenceFile.Writer writer =
        SequenceFile.createWriter(fs, conf, canopyFileName, StockVector.class, IntWritable.class);

    System.out.println("after seq file");
    System.out.println("canopies" + canopyCenters.size());
    for (StockVector canopyCenter : canopyCenters) {
      // write canopy to file
      writer.append(canopyCenter, IntKey);
      System.out.println("sum " + canopyCenter.GetSum());
    }
    System.out.println("canopies end" + canopyCenters.size());
    // close writer and file system
    writer.close();
    // fs.close();
  }
Esempio n. 14
0
  /**
   * set up input file which has the list of input files.
   *
   * @return boolean
   * @throws IOException
   */
  private boolean setup() throws IOException {
    estimateSavings();

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    // The control file should have small size blocks. This helps
    // in spreading out the load from mappers that will be spawned.
    jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    int opCount = 0, synCount = 0;
    SequenceFile.Writer opWriter = null;

    try {
      opWriter =
          SequenceFile.createWriter(
              fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE);
      for (RaidPolicyPathPair p : raidPolicyPathPairList) {
        // If a large set of files are Raided for the first time, files
        // in the same directory that tend to have the same size will end up
        // with the same map. This shuffle mixes things up, allowing a better
        // mix of files.
        java.util.Collections.shuffle(p.srcPaths);
        for (FileStatus st : p.srcPaths) {
          opWriter.append(new Text(st.getPath().toString()), p.policy);
          opCount++;
          if (++synCount > SYNC_FILE_MAX) {
            opWriter.sync();
            synCount = 0;
          }
        }
      }

    } finally {
      if (opWriter != null) {
        opWriter.close();
      }
      fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file
    }
    raidPolicyPathPairList.clear();

    jobconf.setInt(OP_COUNT_LABEL, opCount);
    LOG.info("Number of files=" + opCount);
    jobconf.setNumMapTasks(
        getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers()));
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return opCount != 0;
  }
Esempio n. 15
0
    @SuppressWarnings("deprecation")
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);
      List<Cluster> newKMeansClusters = new ArrayList<Cluster>();
      List<Cluster> newCanopyClusters = new ArrayList<Cluster>();

      for (Cluster kMeansCluster : _clusters.keySet()) {
        Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster);

        // Set a new Cluster center
        Vector center = new Vector();
        center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]);
        List<Vector> vectors = new ArrayList<Vector>();

        for (Vector currentVector : _clusters.get(kMeansCluster)) {
          vectors.add(new Vector(currentVector));

          // Sums the vectors to a new vector in order to find the one that is the closest to all
          // others, it will be our new cluster center.
          for (int i = 0; i < currentVector.getElements().length; i++)
            center.getElements()[i] += currentVector.getElements()[i];
        }

        // Divides the vector's elements in order to find its real location (it will be a fictive
        // vector)
        for (int i = 0; i < center.getElements().length; i++)
          center.getElements()[i] = center.getElements()[i] / vectors.size();

        Cluster newKMeansCluster = new Cluster(center);
        canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster));
        newKMeansClusters.add(newKMeansCluster);
        newCanopyClusters.add(canopyCluster);

        // Adding the vectors to the new cluster center
        for (Vector vector : vectors) {
          context.write(newKMeansCluster, vector);
        }
      }

      Configuration conf = context.getConfiguration();
      Path outPath = new Path(conf.get("centers.path"));
      FileSystem fs = FileSystem.get(conf);

      if (fs.exists(outPath)) fs.delete(outPath, true);

      SequenceFile.Writer writer =
          SequenceFile.createWriter(
              fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class);
      context.getCounter(Counter.CONVERGED).setValue(0);

      for (int i = 0; i < newKMeansClusters.size(); i++) {
        writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i));

        if (newCanopyClusters.get(i).getIsCovered())
          context.getCounter(Counter.CONVERGED).increment(1);
      }

      writer.close();
    }
 private static <T extends WritableComparable> Path writePartitionFile(
     String testname, JobConf conf, T[] splits) throws IOException {
   final FileSystem fs = FileSystem.getLocal(conf);
   final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
   Path p = new Path(testdir, testname + "/_partition.lst");
   TotalOrderPartitioner.setPartitionFile(conf, p);
   conf.setNumReduceTasks(splits.length + 1);
   SequenceFile.Writer w = null;
   try {
     NullWritable nw = NullWritable.get();
     w =
         SequenceFile.createWriter(
             fs,
             conf,
             p,
             splits[0].getClass(),
             NullWritable.class,
             SequenceFile.CompressionType.NONE);
     for (int i = 0; i < splits.length; ++i) {
       w.append(splits[i], NullWritable.get());
     }
   } finally {
     if (null != w) w.close();
   }
   return p;
 }
  private static Path saveVector(Configuration conf, Path path, Vector v) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    SequenceFile.Writer writer =
        new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);

    try {
      writer.append(new IntWritable(0), new VectorWritable(v));
    } finally {
      writer.close();
    }
    return path;
  }
Esempio n. 18
0
 /** Reduce task done, write output to a file. */
 @Override
 public void close() throws IOException {
   // write output to a file
   Path outDir = new Path(TMP_DIR, "out");
   Path outFile = new Path(outDir, "reduce-out");
   FileSystem fileSys = FileSystem.get(conf);
   SequenceFile.Writer writer =
       SequenceFile.createWriter(
           fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE);
   writer.append(new LongWritable(numInside), new LongWritable(numOutside));
   writer.close();
 }
Esempio n. 19
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
  public static void writeClustersToFile(
      FileSystem fs, Configuration conf, int k, List<Vector> points, Path path) throws IOException {

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
    for (int i = 0; i < k; i++) {
      Vector vec = points.get(i);
      Kluster cluster = new Kluster(vec, i, new EuclideanDistanceMeasure());
      writer.append(new Text(cluster.getIdentifier()), cluster);
    }
    writer.close();

    //		SequenceFileDumper.main(new String[] { "--input", inClusterFile.toString() });
  }
Esempio n. 21
0
 /** Reduce task done, write output to a file. */
 @Override
 public void cleanup(Context context) throws IOException {
   // write output to a file
   Configuration conf = context.getConfiguration();
   Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
   Path outFile = new Path(outDir, "reduce-out");
   FileSystem fileSys = FileSystem.get(conf);
   SequenceFile.Writer writer =
       SequenceFile.createWriter(
           fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE);
   writer.append(new LongWritable(numInside), new LongWritable(numOutside));
   writer.close();
 }
Esempio n. 22
0
  private void generateTestData() {
    try {
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, new Path(INPUT), LongWritable.class, Text.class);

      for (int i = 0; i < input.length; i++) {
        writer.append(new LongWritable(i), new Text(input[i]));
      }

      writer.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Esempio n. 23
0
 @Override
 protected void cleanup(Context context) throws IOException, InterruptedException {
   super.cleanup(context);
   Configuration conf = context.getConfiguration();
   Path outPath = new Path(conf.get(CENTERS_CONF_KEY));
   FileSystem fs = FileSystem.get(conf);
   // fs.delete(outPath, true);
   SequenceFile.Writer writer =
       SequenceFile.createWriter(
           fs, context.getConfiguration(), outPath, Centroid.class, IntWritable.class);
   final IntWritable mockValue = new IntWritable(0);
   for (Centroid center : centers) {
     writer.append(center, mockValue);
   }
   writer.close();
 }
  public static void writePointsToFile(
      FileSystem fs, Configuration conf, List<Vector> points, Path path) throws IOException {

    SequenceFile.Writer writer =
        new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
    long recNum = 0;
    VectorWritable vec = new VectorWritable();
    for (Vector point : points) {
      vec.set(point);
      writer.append(new LongWritable(recNum++), vec);
    }
    writer.close();

    //		VectorDumper.main(new String[] { "--input", inPointFile.toString() });
    //		SequenceFileDumper.main(new String[] { "--input", inPointFile.toString() });
  }
Esempio n. 25
0
  @Test
  public void testReadString() throws Exception {
    if (SKIP) {
      return;
    }

    //        final Path file = new Path("hdfs://localhost:9000/tmp/test/test-hdfs-file");
    final Path file =
        new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath());
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    // now set classes for filesystems. This is normally done using java.util.ServiceLoader which
    // doesn't
    // work inside OSGi.
    conf.setClass("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class, FileSystem.class);
    conf.setClass(
        "fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class, FileSystem.class);
    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            conf,
            SequenceFile.Writer.file(file),
            SequenceFile.Writer.keyClass(NullWritable.class),
            SequenceFile.Writer.valueClass(Text.class));
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    context.addRoutes(
        new RouteBuilder() {
          public void configure() {
            //
            // from("hdfs2://localhost:9000/tmp/test/test-hdfs-file?fileSystemType=HDFS&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
            from("hdfs2:///"
                    + file.toUri()
                    + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                .to("mock:result");
          }
        });
    context.start();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
  }
 public static void writeVectorsToFile(
     FileSystem fs, Configuration conf, List<Vector> vectors, Path path) throws IOException {
   SequenceFile.Writer writer =
       new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
   VectorWritable vec = new VectorWritable();
   for (Vector vector : vectors) {
     vec.set(vector);
     if (NamedVector.class.isAssignableFrom(vector.getClass())) {
       writer.append(new Text(((NamedVector) vector).getName()), vec);
     } else {
       writer.append(new Text(vector.toString()), vec);
     }
   }
   writer.close();
   //		VectorDumper.main(new String[] { "--input", inPointFile.toString() });
   //		SequenceFileDumper.main(new String[] { "--input", inPointFile.toString() });
 }
  /** {@inheritDoc} */
  @Override
  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    SequenceFile.Writer fileListWriter = null;

    try {
      fileListWriter = getWriter(pathToListingFile);

      for (Path path : options.getSourcePaths()) {
        FileSystem sourceFS = path.getFileSystem(getConf());
        path = makeQualified(path);

        FileStatus rootStatus = sourceFS.getFileStatus(path);
        Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
        boolean localFile = (rootStatus.getClass() != FileStatus.class);

        FileStatus[] sourceFiles = sourceFS.listStatus(path);
        if (sourceFiles != null && sourceFiles.length > 0) {
          for (FileStatus sourceStatus : sourceFiles) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
            }
            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);

            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
              }
              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
            }
          }
        } else {
          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
        }
      }
    } finally {
      try {
        if (fileListWriter != null) fileListWriter.close();
      } catch (IOException exception) {
        LOG.error("Could not close output-steam to the file-list: ", exception);
        throw exception;
      }
    }
  }
Esempio n. 28
0
  public void createBucketWithRandomTS(String s, int EXP, int ANZ) throws IOException {

    DecimalFormat df = new DecimalFormat("0.000");

    System.out.println("--> create bucket : uncorrelated TS alpha=0.5");

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);

    Path path = new Path(outputDir + "/" + s + "_alpha_0.5_.tsb.vec.seq");
    System.out.println("--> create bucket : " + path.toString());

    // write a SequenceFile form a Vector
    SequenceFile.Writer writer =
        new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class);

    System.out.println(
        "--> process bucket : Uniform-Random-Generator ( z="
            + ANZ
            + ", l="
            + Math.pow(2, EXP)
            + ", TestA )");

    int SAMPLES = 0;
    for (int i = 0; i < ANZ; i++) {

      TSData data = new TSData();
      data.dataset = processTESTA(data.getRandomData((int) Math.pow(2, EXP)));
      Messreihe mr = data.getMessreihe();
      if (SAMPLES < TSPropertyTester.zSAMPLES) TSPropertyTester.addSample(mr);
      SAMPLES++;
      /** Here we lose the METADATA of each row!!! */
      System.out.print("  (" + i + ")");
      NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label);
      VectorWritable vec = new VectorWritable();
      vec.set(nv);

      writer.append(new Text(nv.getName()), vec);
    }

    writer.close();
    System.out.println("### DONE : " + path.toString());
  }
Esempio n. 29
0
  /**
   * Create LRC time series ...
   *
   * @param s
   * @param z
   * @param EXP
   * @param BETA
   * @throws IOException
   * @throws Exception
   */
  public void createBucketWithRandomTS(String s, int z, int EXP, double BETA)
      throws IOException, Exception {

    DecimalFormat df = new DecimalFormat("0.000");

    System.out.println("--> create bucket : LRC with beta=" + df.format(BETA));

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);

    Path path = new Path(outputDir + "/" + s + "_LRC_beta_" + df.format(BETA) + ".tsb.vec.seq");
    System.out.println("--> create bucket : " + path.toString());

    // write a SequenceFile form a Vector
    SequenceFile.Writer writer =
        new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class);

    System.out.println("--> process bucket : LRC-Generator (" + z + ")");

    int SAMPLES = 0;
    for (int i = 0; i < z; i++) {

      boolean showTESTS = false;

      Messreihe mr =
          LongTermCorrelationSeriesGenerator.getRandomRow(
              (int) Math.pow(2, EXP), BETA, showTESTS, false);
      if (SAMPLES < TSPropertyTester.zSAMPLES) TSPropertyTester.addSample(mr);
      SAMPLES++;

      TSData data = TSData.convertMessreihe(mr);

      System.out.println("(" + i + ")");
      NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label);
      VectorWritable vec = new VectorWritable();
      vec.set(nv);

      writer.append(new Text(nv.getName()), vec);
    }

    writer.close();
    System.out.println("### DONE : " + path.toString());
  }
Esempio n. 30
0
  /**
   * Im sourceFolder wird eine komplette Gruppe gewählt und in einen TS Bucket überführt.
   *
   * <p>==> ist nur eine SAVE Funktion ...
   *
   * @param groupFolder
   */
  public void createBucketFromLocalFilesInDirectory(String groupFolder, int limit)
      throws IOException {

    LIMIT = limit;
    String s = groupFolder;

    File f = new File(sourcFolder + s);
    System.out.println("--> load data : " + f.getAbsolutePath());

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);

    Path path = new Path(outputDir + "/" + s + ".tsb.vec.seq");
    System.out.println("--> create bucket : " + path.toString());

    // write a SequenceFile form a Vector
    SequenceFile.Writer writer =
        new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class);

    File[] liste = f.listFiles(new AccessFileFilter());
    System.out.println(liste.length);
    System.out.println("--> process bucket : " + f.getAbsolutePath() + " (" + liste.length + ")");

    int c = 0;
    for (File file : liste) {
      c++;
      if (c < LIMIT) {
        TSData data = new TSData(file);
        System.out.println("(" + c + ")");
        NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label);
        VectorWritable vec = new VectorWritable();
        vec.set(nv);

        writer.append(new Text(nv.getName()), vec);
      }
      if (c % 10000 == 0) {
        System.out.println(c);
      }
    }

    writer.close();
  }