Пример #1
0
 public static void createCentersSequenceFile(
     Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath)
     throws Exception {
   Path seqFile = new Path(sequenceFilePath);
   if (fs.exists(seqFile)) {
     fs.delete(seqFile, true);
   }
   FSDataInputStream inputStream = fs.open(new Path(centroidsPath));
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class);
   IntWritable value = new IntWritable(0);
   while (inputStream.available() > 0) {
     String line = inputStream.readLine();
     StringTokenizer tokenizer = new StringTokenizer(line, " ");
     int dim = tokenizer.countTokens() - 1;
     int clusterId = Integer.valueOf(tokenizer.nextToken());
     double[] coords = new double[dim];
     for (int i = 0; i < dim; i++) {
       coords[i] = Double.valueOf(tokenizer.nextToken());
     }
     Centroid cluster = new Centroid(clusterId, new Point(coords));
     writer.append(cluster, value);
   }
   IOUtils.closeStream(writer);
   inputStream.close();
 }
    @Override
    public boolean next(LongWritable key, ProtobufWritable<M> value) throws IOException {
      if (pos_ > end_ || fileIn_.available() <= 0) {
        return false;
      }
      key.set(pos_);

      if (Protobufs.KNOWN_GOOD_POSITION_MARKER.length + pos_ <= end_)
        fileIn_.skipBytes(Protobufs.KNOWN_GOOD_POSITION_MARKER.length);

      pos_ = fileIn_.getPos();

      try {
        if (protoBuilder == null) {
          protoBuilder = Protobufs.getMessageBuilder(typeRef_.getRawClass());
        }

        Message.Builder builder = protoBuilder.clone();
        final boolean success = builder.mergeDelimitedFrom(fileIn_);
        if (success) {
          value.set((M) builder.build());
        }
        return success;
      } catch (InvalidProtocolBufferException e) {
        LOG.error(
            "Invalid Protobuf exception while building " + typeRef_.getRawClass().getName(), e);
      } catch (UninitializedMessageException ume) {
        LOG.error(
            "Uninitialized Message Exception while building " + typeRef_.getRawClass().getName(),
            ume);
      }
      return false;
    }
  public static void main(String args[]) throws Exception {
    if (args.length != 2) {
      System.err.println("argumentos: dir-de-entrada arquivo-de-saida");
      System.exit(1);
    }

    FileSystem fs = FileSystem.get(confHadoop);
    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1] + "/dataset");
    FSDataInputStream in = null;
    SequenceFile.Writer writer = null;
    List<Path> files = listFiles(inPath, jpegFilter);
    try {
      writer = SequenceFile.createWriter(fs, confHadoop, outPath, Text.class, BytesWritable.class);
      for (Path p : files) {
        in = fs.open(p);
        byte buffer[] = new byte[in.available()];
        in.readFully(buffer);
        writer.append(new Text(p.getName()), new BytesWritable(buffer));
        in.close();
      }
    } finally {
      IOUtils.closeStream(writer);
    }
  }
 private String retrieveLineSeparator(FSDataInputStream fis) throws IOException {
   char current;
   String lineSeparator = "";
   while (fis.available() > 0) {
     current = (char) fis.read();
     if ((current == '\n') || (current == '\r')) {
       lineSeparator += current;
       if (fis.available() > 0) {
         char next = (char) fis.read();
         if ((next == '\r') || (next == '\n')) {
           lineSeparator += next;
         }
       }
       return lineSeparator;
     }
   }
   return null;
 }
Пример #5
0
 @Override
 public int available() throws IOException {
   try {
     return in.available();
   } catch (FileNotFoundException e) {
     return tryOpen().available();
   } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
     return tryOpen().available();
   } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
     return tryOpen().available();
   }
 }
Пример #6
0
 /**
  * Read the side file to get the last flush length.
  *
  * @param fs the file system to use
  * @param deltaFile the path of the delta file
  * @return the maximum size of the file to use
  * @throws IOException
  */
 private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException {
   Path lengths = OrcRecordUpdater.getSideFile(deltaFile);
   long result = Long.MAX_VALUE;
   try {
     FSDataInputStream stream = fs.open(lengths);
     result = -1;
     while (stream.available() > 0) {
       result = stream.readLong();
     }
     stream.close();
     return result;
   } catch (IOException ioe) {
     return result;
   }
 }
Пример #7
0
  private void loadUnforwardedCounts() throws IOException {

    unforwardedCounts = new HashMap<Unforwarded, Long>();

    FSDataInputStream in = getHdfs().open(getUnforwardedCountsPath());

    while (in.available() > 0) {

      String u = in.readUTF();

      Long count = in.readLong();

      unforwardedCounts.put(Unforwarded.valueOf(u), count);
    }

    in.close();
  }
Пример #8
0
  // @Test
  // Check file/read write
  public void testFileIO() throws Exception {
    Path subDir1 = new Path("dir.1");
    Path file1 = new Path("dir.1/foo.1");

    kosmosFileSystem.mkdirs(baseDir);
    assertTrue(kosmosFileSystem.isDirectory(baseDir));
    kosmosFileSystem.setWorkingDirectory(baseDir);

    kosmosFileSystem.mkdirs(subDir1);

    FSDataOutputStream s1 =
        kosmosFileSystem.create(file1, true, 4096, (short) 1, (long) 4096, null);

    int bufsz = 4096;
    byte[] data = new byte[bufsz];

    for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 16);

    // write 4 bytes and read them back; read API should return a byte per call
    s1.write(32);
    s1.write(32);
    s1.write(32);
    s1.write(32);
    // write some data
    s1.write(data, 0, data.length);
    // flush out the changes
    s1.close();

    // Read the stuff back and verify it is correct
    FSDataInputStream s2 = kosmosFileSystem.open(file1, 4096);
    int v;
    long nread = 0;

    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);

    assertEquals(s2.available(), data.length);

    byte[] buf = new byte[bufsz];
    s2.read(buf, 0, buf.length);
    nread = s2.getPos();

    for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]);

    assertEquals(s2.available(), 0);

    s2.close();

    // append some data to the file
    try {
      s1 = kosmosFileSystem.append(file1);
      for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 17);
      // write the data
      s1.write(data, 0, data.length);
      // flush out the changes
      s1.close();

      // read it back and validate
      s2 = kosmosFileSystem.open(file1, 4096);
      s2.seek(nread);
      s2.read(buf, 0, buf.length);
      for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]);

      s2.close();
    } catch (Exception e) {
      System.out.println("append isn't supported by the underlying fs");
    }

    kosmosFileSystem.delete(file1, true);
    assertFalse(kosmosFileSystem.exists(file1));
    kosmosFileSystem.delete(subDir1, true);
    assertFalse(kosmosFileSystem.exists(subDir1));
    kosmosFileSystem.delete(baseDir, true);
    assertFalse(kosmosFileSystem.exists(baseDir));
  }
Пример #9
0
    public void map(Text key, Text value, Context context)
        throws InterruptedException, IOException {

      String filename = key.toString();
      String json = value.toString();

      // Make sure the input is valid
      if (!(filename.isEmpty() || json.isEmpty())) {

        // Change the json-type feature to Mat-type feature
        Mat descriptor = json2mat(json);
        if (descriptor != null) {
          // Read the query feature from the cache in Hadoop
          Mat query_features;
          String pathStr = context.getConfiguration().get("featureFilePath");
          FileSystem fs = FileSystem.get(context.getConfiguration());
          FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr));
          StringBuilder sb = new StringBuilder();

          // Use a buffer to read the query_feature
          int remain = fsDataInputStream.available();
          while (remain > 0) {
            int read;
            byte[] buf = new byte[BUF_SIZE];
            read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE);
            sb.append(new String(buf, 0, read, StandardCharsets.UTF_8));
            remain = remain - read;
            System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length());
          }

          // Read the query_feature line by line
          //                    Scanner sc = new Scanner(fsDataInputStream, "UTF-8");
          //                    StringBuilder sb = new StringBuilder();
          //                    while (sc.hasNextLine()) {
          //                        sb.append(sc.nextLine());
          //                    }
          //                    String query_json = sb.toString();
          //                    String query_json = new String(buf, StandardCharsets.UTF_8);

          String query_json = sb.toString();
          fsDataInputStream.close();
          query_features = json2mat(query_json);

          // Get the similarity of the current database image against the query image
          DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
          MatOfDMatch matches = new MatOfDMatch();

          // Ensure the two features have same length of cols (the feature extracted are all 128
          // cols(at least in this case))
          if (query_features.cols() == descriptor.cols()) {

            matcher.match(query_features, descriptor, matches);
            DMatch[] dMatches = matches.toArray();

            // Calculate the max/min distances
            //                    double max_dist = Double.MAX_VALUE;
            //                    double min_dist = Double.MIN_VALUE;
            double max_dist = 0;
            double min_dist = 100;
            for (int i = 0; i < dMatches.length; i++) {
              double dist = dMatches[i].distance;
              if (min_dist > dist) min_dist = dist;
              if (max_dist < dist) max_dist = dist;
            }
            // Only distances ≤ threshold are good matches
            double threshold = max_dist * THRESHOLD_FACTOR;
            //                    double threshold = min_dist * 2;
            LinkedList<DMatch> goodMatches = new LinkedList<DMatch>();

            for (int i = 0; i < dMatches.length; i++) {
              if (dMatches[i].distance <= threshold) {
                goodMatches.addLast(dMatches[i]);
              }
            }

            // Get the ratio of good_matches to all_matches
            double ratio = (double) goodMatches.size() / (double) dMatches.length;

            System.out.println("*** current_record_filename:" + filename + " ***");
            System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features);
            System.out.println(
                "min_dist of keypoints:" + min_dist + "  max_dist of keypoints:" + max_dist);
            System.out.println(
                "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size());
            //                    System.out.println("type:" + descriptor.type() + " channels:" +
            // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols());
            //                    System.out.println("qtype:" + query_features.type() + "
            // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + "
            // qcols:" + query_features.cols());
            System.out.println();

            if (ratio > PERCENTAGE_THRESHOLD) {
              // Key:1        Value:filename|ratio
              context.write(ONE, new Text(filename + "|" + ratio));
              //                        context.write(ONE, new Text(filename + "|" +
              // String.valueOf(goodMatches.size())));
            }
          } else {
            System.out.println("The size of the features are not equal");
          }
        } else {
          // a null pointer, do nothing
          System.out.println("A broken/null feature:" + filename);
          System.out.println();
        }
      }
    }
Пример #10
0
 @Override
 public int available() throws IOException {
   return fsDataInputStream.available();
 }
  public void testFileIO() throws Exception {

    Path subDir1 = new Path("tfio_dir.1");
    Path file1 = new Path("tfio_dir.1/foo.1");
    Path baseDir = new Path("tfio_testDirs1");

    fs.mkdirs(baseDir);
    assertTrue(fs.isDirectory(baseDir));
    // fs.setWorkingDirectory(baseDir);

    fs.mkdirs(subDir1);

    FSDataOutputStream s1 = fs.create(file1, true, 4096, (short) 1, (long) 4096, null);

    int bufsz = 4096;
    byte[] data = new byte[bufsz];

    for (int i = 0; i < data.length; i++) data[i] = (byte) (i % 16);

    // write 4 bytes and read them back; read API should return a byte per
    // call
    s1.write(32);
    s1.write(32);
    s1.write(32);
    s1.write(32);
    // write some data
    s1.write(data, 0, data.length);
    // flush out the changes
    s1.close();

    // Read the stuff back and verify it is correct
    FSDataInputStream s2 = fs.open(file1, 4096);
    int v;

    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);
    v = s2.read();
    assertEquals(v, 32);

    assertEquals(s2.available(), data.length);

    byte[] buf = new byte[bufsz];
    s2.read(buf, 0, buf.length);
    for (int i = 0; i < data.length; i++) assertEquals(data[i], buf[i]);

    assertEquals(s2.available(), 0);

    s2.close();

    fs.delete(file1, true);
    assertFalse(fs.exists(file1));
    fs.delete(subDir1, true);
    assertFalse(fs.exists(subDir1));
    fs.delete(baseDir, true);
    assertFalse(fs.exists(baseDir));

    fs.delete(subDir1);
    fs.delete(file1);
    fs.delete(baseDir);
  }
Пример #12
0
  // in rare circumstances, two logs can be left uncapped (lacking a commit at the end signifying
  // that all the content in the log was committed)
  @Test
  public void testRecoveryMultipleLogs() throws Exception {
    try {
      DirectUpdateHandler2.commitOnClose = false;
      final Semaphore logReplay = new Semaphore(0);
      final Semaphore logReplayFinish = new Semaphore(0);

      UpdateLog.testing_logReplayHook =
          new Runnable() {
            @Override
            public void run() {
              try {
                assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
              } catch (Exception e) {
                throw new RuntimeException(e);
              }
            }
          };

      UpdateLog.testing_logReplayFinishHook =
          new Runnable() {
            @Override
            public void run() {
              logReplayFinish.release();
            }
          };

      String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

      clearIndex();
      assertU(commit());

      assertU(adoc("id", "AAAAAA"));
      assertU(adoc("id", "BBBBBB"));
      assertU(adoc("id", "CCCCCC"));

      h.close();
      String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
      Arrays.sort(files);
      String fname = files[files.length - 1];

      FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length - 1]));
      dos.writeLong(0xffffffffffffffffL);
      dos.writeChars(
          "This should be appended to a good log file, representing a bad partially written record.");
      dos.close();

      FSDataInputStream dis = fs.open(new Path(logDir, files[files.length - 1]));
      byte[] content = new byte[(int) dis.available()];

      dis.readFully(content);

      dis.close();

      // Now make a newer log file with just the IDs changed.  NOTE: this may not work if log format
      // changes too much!
      findReplace(
          "AAAAAA".getBytes(StandardCharsets.UTF_8),
          "aaaaaa".getBytes(StandardCharsets.UTF_8),
          content);
      findReplace(
          "BBBBBB".getBytes(StandardCharsets.UTF_8),
          "bbbbbb".getBytes(StandardCharsets.UTF_8),
          content);
      findReplace(
          "CCCCCC".getBytes(StandardCharsets.UTF_8),
          "cccccc".getBytes(StandardCharsets.UTF_8),
          content);

      // WARNING... assumes format of .00000n where n is less than 9
      long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1));
      String fname2 =
          String.format(
              Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME, logNumber + 1);

      dos = fs.create(new Path(logDir, fname2), (short) 1);
      dos.write(content);
      dos.close();

      logReplay.release(1000);
      logReplayFinish.drainPermits();
      ignoreException(
          "OutOfBoundsException"); // this is what the corrupted log currently produces... subject
      // to change.
      createCore();
      assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
      resetExceptionIgnores();
      assertJQ(req("q", "*:*"), "/response/numFound==6");

    } finally {
      DirectUpdateHandler2.commitOnClose = true;
      UpdateLog.testing_logReplayHook = null;
      UpdateLog.testing_logReplayFinishHook = null;
    }
  }