Esempio n. 1
0
  public void loadFromLocalFS(String fn, java.util.Vector<Integer> ids) throws IOException {

    Configuration config = new Configuration();

    if (useHDFS) {
      System.out.println(">>HDFS<< :: " + fn);
      /** define HDFS */
      config.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
      config.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"));
    } else {
      System.out.println("<<LocalFS>> :: " + fn);
    }

    FileSystem fs = FileSystem.get(config);

    Path path = new Path(fn);

    // write a SequenceFile form a Vector
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config);

    System.out.println("--> process bucket    : " + fn);
    System.out.println("--> compression-codes : " + reader.getCompressionCodec());
    System.out.println("--> key-classename    : " + reader.getKeyClassName());
    System.out.println("--> value-classname   : " + reader.getValueClassName());

    // TSBucket.setRecordMode( "page-id" );
    TSBucket.setRecordMode("counter");

    boolean goOn = true;
    int i = 1;
    while (goOn && i <= LIMIT) {

      Text key = new Text();

      VectorWritable vec = new VectorWritable();
      goOn = reader.next(key);

      int pageID = TSBucket.getRecordID(key.toString());

      boolean skipped = false;

      // bearbeite nur die, die im FILTER file stehen ...
      if (ids != null) {
        if (ids.contains(pageID)) {
          skipped = false;
        } else {
          skipped = true;
        }
      }

      reader.getCurrentValue(vec);

      Messreihe mr = new Messreihe();
      mr.setDescription(i + " ) " + fn + "_[" + key.toString() + "]");
      mr.setLabel(pageID + " ");

      int c = 0;
      NamedVector vector = (NamedVector) vec.get();
      while (c < vector.size()) {
        double value = vector.get(c);
        // System.out.println( c + "\t" + value  );

        mr.addValue(value);

        c++;
      }
      try {
        Messreihe m = null;
        if (tst != null) {
          m = tst.processReihe(fw, mr, fwe);
        }
      } catch (Exception ex) {
        Logger.getLogger(TSBucket.class.getName()).log(Level.SEVERE, null, ex);
      }

      i = i + 1;

      int code = 1;
      if (skipped) {
        code = 0;
      }

      if (inMEM) {
        bucketData.add(mr);
      }

      // System.out.println( code + "\t" + mr.getLabel() + "\t" + mr.xValues.size() + "\t" +
      // mr.summeY() );

    }
    System.out.println("--> nr of records     : " + (i - 1));
  }