示例#1
0
 /** Copy FileSystem files to local files. */
 public static boolean copy(
     FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf)
     throws IOException {
   if (srcFS.getFileStatus(src).isDir()) {
     if (!dst.mkdirs()) {
       return false;
     }
     FileStatus contents[] = srcFS.listStatus(src);
     for (int i = 0; i < contents.length; i++) {
       copy(
           srcFS,
           contents[i].getPath(),
           new File(dst, contents[i].getPath().getName()),
           deleteSource,
           conf);
     }
   } else if (srcFS.isFile(src)) {
     InputStream in = srcFS.open(src);
     IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
   } else {
     throw new IOException(src.toString() + ": No such file or directory");
   }
   if (deleteSource) {
     return srcFS.delete(src, true);
   } else {
     return true;
   }
 }
  public void testFormat() throws Exception {
    localFs = FileSystem.getLocal(defaultConf);
    localFs.delete(workDir, true);

    Job job = new Job(new Configuration(defaultConf));
    Path file = new Path(workDir, "test.txt");

    int seed = new Random().nextInt();
    Random random = new Random(seed);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
      // create a file with length entries
      Writer writer = new OutputStreamWriter(localFs.create(file));
      try {
        MyClass mc = new MyClass();
        for (int i = 0; i < length; i++) {
          mc.s = Integer.toString(i);
          mc.v = i;
          byte[] raw = MessagePack.pack(mc);
          byte[] b64e = base64_.encodeBase64(raw);
          byte[] b64d = base64_.decode(b64e);
          MyClass mc2 = MessagePack.unpack(b64d, mc.getClass());
          assertEquals(mc.s, mc2.s);
          assertEquals(mc.v, mc2.v);

          writer.write(base64_.encodeToString(raw));
        }
      } finally {
        writer.close();
      }
      checkFormat(job);
    }
  }
示例#3
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }
示例#4
0
  /** Copy local files to a FileSystem. */
  public static boolean copy(
      File src, FileSystem dstFS, Path dst, boolean deleteSource, Configuration conf)
      throws IOException {
    dst = checkDest(src.getName(), dstFS, dst, false);

    if (src.isDirectory()) {
      if (!dstFS.mkdirs(dst)) {
        return false;
      }
      File contents[] = listFiles(src);
      for (int i = 0; i < contents.length; i++) {
        copy(contents[i], dstFS, new Path(dst, contents[i].getName()), deleteSource, conf);
      }
    } else if (src.isFile()) {
      InputStream in = null;
      OutputStream out = null;
      try {
        in = new FileInputStream(src);
        out = dstFS.create(dst);
        IOUtils.copyBytes(in, out, conf);
      } catch (IOException e) {
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
        throw e;
      }
    } else {
      throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
      return FileUtil.fullyDelete(src);
    } else {
      return true;
    }
  }
示例#5
0
  private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset)
      throws IOException {
    RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false);
    Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>();
    while (rit.hasNext()) {
      Path path = rit.next().getPath();
      String filename =
          path.toString().substring(path.getParent().toString().length(), path.toString().length());

      if (filename.startsWith("/part-")) {
        long filesize = fs.getFileStatus(path).getLen();
        if (offset < filesize) {
          FSDataInputStream handle = fs.open(path);
          if (offset > 0) {
            handle.seek(offset);
          }
          fileHandleList.add(handle);
        }
        offset -= filesize;
      }
    }
    if (fileHandleList.size() == 1) return fileHandleList.get(0);
    else if (fileHandleList.size() > 1) {
      Enumeration<FSDataInputStream> enu = fileHandleList.elements();
      return new SequenceInputStream(enu);
    } else {
      System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!");
      return null;
    }
  }
示例#6
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
示例#7
0
  // the method which actually copies the caches locally and unjars/unzips them
  // and does chmod for the files
  private static Path localizeCache(
      Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive)
      throws IOException {
    FileSystem fs = getFileSystem(cache, conf);
    FileSystem localFs = FileSystem.getLocal(conf);
    Path parchive = null;

    if (isArchive) {
      parchive =
          new Path(
              cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName()));
    } else {
      parchive = cacheStatus.localizedLoadPath;
    }
    if (!localFs.mkdirs(parchive.getParent())) {
      throw new IOException(
          "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString());
    }
    String cacheId = cache.getPath();

    fs.copyToLocalFile(new Path(cacheId), parchive);
    if (isArchive) {
      String tmpArchive = parchive.toString().toLowerCase();
      File srcFile = new File(parchive.toString());
      File destDir = new File(parchive.getParent().toString());
      if (tmpArchive.endsWith(".jar")) {
        RunJar.unJar(srcFile, destDir);
      } else if (tmpArchive.endsWith(".zip")) {
        FileUtil.unZip(srcFile, destDir);
      } else if (isTarFile(tmpArchive)) {
        FileUtil.unTar(srcFile, destDir);
      }
      // else will not do anyhting
      // and copy the file into the dir as it is
    }
    long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString()));
    cacheStatus.size = cacheSize;
    addCacheInfoUpdate(cacheStatus);

    // do chmod here
    try {
      // Setting recursive permission to grant everyone read and execute
      Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir);
      LOG.info("Doing chmod on localdir :" + localDir);
      FileUtil.chmod(localDir.toString(), "ugo+rx", true);
    } catch (InterruptedException e) {
      LOG.warn("Exception in chmod" + e.toString());
    }

    // update cacheStatus to reflect the newly cached file
    cacheStatus.mtime = getTimestamp(conf, cache);
    return cacheStatus.localizedLoadPath;
  }
示例#8
0
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      Path pt = new Path("/user/yao/query/query");
      FileSystem fs = FileSystem.get(new Configuration());
      BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt)));
      String line = br.readLine();
      String[] keywords = line.split(",");
      k0 = keywords[0];
      k1 = keywords[1];
      k2 = keywords[2];
      br.close();
    }
 /**
  * Opens an FSDataInputStream at the indicated Path.
  *
  * @param f the file name to open
  * @param bufferSize the size of the buffer to be used.
  */
 @Override
 public FSDataInputStream open(Path f, int bufferSize) throws IOException {
   FileSystem fs;
   InputStream in;
   if (verifyChecksum) {
     fs = this;
     in = new ChecksumFSInputChecker(this, f, bufferSize);
   } else {
     fs = getRawFileSystem();
     in = fs.open(f, bufferSize);
   }
   return new FSDataBoundedInputStream(fs, f, in);
 }
示例#10
0
 private static Path checkDest(String srcName, FileSystem dstFS, Path dst, boolean overwrite)
     throws IOException {
   if (dstFS.exists(dst)) {
     FileStatus sdst = dstFS.getFileStatus(dst);
     if (sdst.isDir()) {
       if (null == srcName) {
         throw new IOException("Target " + dst + " is a directory");
       }
       return checkDest(null, dstFS, new Path(dst, srcName), overwrite);
     } else if (!overwrite) {
       throw new IOException("Target " + dst + " already exists");
     }
   }
   return dst;
 }
示例#11
0
  public void configure(JobConf conf) {
    numberOfCenters = Integer.valueOf(conf.get("numberOfCenters"));
    centersDirectory = conf.get("centersReadDirectory");

    try {
      Configuration c = new Configuration();
      FileSystem fs = FileSystem.get(c);

      for (int index = 0; index < numberOfCenters; ++index) {
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c);

        LongWritable key = new LongWritable();
        Point value = new Point();

        reader.next(key, value);

        Point center = (Point) value;

        centers.add(center);

        reader.close();
      }
    } catch (IOException e) {
      // do nothing
      // I hope this doesn't happen
      System.out.println("well, damn.");
      e.printStackTrace();
    }
  }
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
示例#13
0
  private void insertFile(
      Path apkPath, Map<String, String> zip_properties, File insert, String method, Path location)
      throws AndrolibException, IOException {
    // ZipFileSystem only writes at .close()
    // http://mail.openjdk.java.net/pipermail/nio-dev/2012-July/001764.html
    try (FileSystem fs = FileSystems.newFileSystem(apkPath, null)) {
      Path root = fs.getPath("/");

      // in order to get the path relative to the zip, we strip off the absolute path, minus what we
      // already have in the zip. thus /var/files/apktool/apk/unknown/folder/file => /folder/file
      Path dest =
          fs.getPath(root.toString(), insert.getAbsolutePath().replace(location.toString(), ""));
      Path newFile = Paths.get(insert.getAbsolutePath());
      Files.copy(newFile, dest, StandardCopyOption.REPLACE_EXISTING);
      fs.close();
    }
  }
示例#14
0
  public static void main(String[] args) throws Exception {
    try {
      String localSrc = args[0];
      String dst = args[1];
      InputStream in = new BufferedInputStream(new FileInputStream(localSrc));

      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(URI.create(dst), conf);

      OutputStream out = fs.create(new Path(dst));

      IOUtils.copyBytes(in, out, 4096, true);
      System.out.print("success");
    } catch (Exception e) {
      System.out.print("fail" + e.toString());
    }
  }
示例#15
0
  /** Start the JobTracker process, listen on the indicated port */
  JobTracker(Configuration conf) throws IOException {
    //
    // Grab some static constants
    //
    maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
    RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000);
    RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000);
    TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f);
    PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f);
    MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks;

    // This is a directory of temporary submission files.  We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);
    this.systemDir = jobConf.getSystemDir();
    this.fs = FileSystem.get(conf);
    FileUtil.fullyDelete(fs, systemDir);
    fs.mkdirs(systemDir);

    // Same with 'localDir' except it's always on the local disk.
    jobConf.deleteLocalFiles(SUBDIR);

    // Set ports, start RPC servers, etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();
    this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf);
    this.interTrackerServer.start();
    Properties p = System.getProperties();
    for (Iterator it = p.keySet().iterator(); it.hasNext(); ) {
      String key = (String) it.next();
      String val = (String) p.getProperty(key);
      LOG.info("Property '" + key + "' is " + val);
    }

    this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030);
    this.infoServer = new JobTrackerInfoServer(this, infoPort);
    this.infoServer.start();

    this.startTime = System.currentTimeMillis();

    new Thread(this.expireTrackers).start();
    new Thread(this.retireJobs).start();
    new Thread(this.initJobs).start();
  }
示例#16
0
  private void insertFolder(
      Path apkPath, Map<String, String> zip_properties, File insert, String method, Path location)
      throws AndrolibException, IOException {
    try (FileSystem fs = FileSystems.newFileSystem(apkPath, null)) {
      Path root = fs.getPath("/");
      Path dest =
          fs.getPath(root.toString(), insert.getAbsolutePath().replace(location.toString(), ""));
      Path parent = dest.normalize();

      // check for folder existing in apkFileSystem
      if (parent != null && Files.notExists(parent)) {
        if (!Files.isDirectory(parent, LinkOption.NOFOLLOW_LINKS)) {
          Files.createDirectories(parent);
        }
      }
      fs.close();
    }
  }
示例#17
0
  public static boolean copy(
      FileSystem srcFS,
      Path[] srcs,
      FileSystem dstFS,
      Path dst,
      boolean deleteSource,
      boolean overwrite,
      Configuration conf)
      throws IOException {
    boolean gotException = false;
    boolean returnVal = true;
    StringBuffer exceptions = new StringBuffer();

    if (srcs.length == 1) return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf);

    // Check if dest is directory
    if (!dstFS.exists(dst)) {
      throw new IOException("`" + dst + "': specified destination directory " + "doest not exist");
    } else {
      FileStatus sdst = dstFS.getFileStatus(dst);
      if (!sdst.isDir())
        throw new IOException(
            "copying multiple files, but last argument `" + dst + "' is not a directory");
    }

    for (Path src : srcs) {
      try {
        if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf)) returnVal = false;
      } catch (IOException e) {
        gotException = true;
        exceptions.append(e.getMessage());
        exceptions.append("\n");
      }
    }
    if (gotException) {
      throw new IOException(exceptions.toString());
    }
    return returnVal;
  }
示例#18
0
 /**
  * Clear the entire contents of the cache and delete the backing files. This should only be used
  * when the server is reinitializing, because the users are going to lose their files.
  */
 public static void purgeCache(Configuration conf, MRAsyncDiskService service) throws IOException {
   synchronized (cachedArchives) {
     LocalFileSystem localFs = FileSystem.getLocal(conf);
     for (Map.Entry<String, CacheStatus> f : cachedArchives.entrySet()) {
       try {
         deleteLocalPath(service, localFs, f.getValue().localizedLoadPath);
       } catch (IOException ie) {
         LOG.debug("Error cleaning up cache", ie);
       }
     }
     cachedArchives.clear();
   }
 }
示例#19
0
  /** Copy all files in a directory to one output file (merge). */
  public static boolean copyMerge(
      FileSystem srcFS,
      Path srcDir,
      FileSystem dstFS,
      Path dstFile,
      boolean deleteSource,
      Configuration conf,
      String addString)
      throws IOException {
    dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);

    if (!srcFS.getFileStatus(srcDir).isDir()) return false;

    OutputStream out = dstFS.create(dstFile);

    try {
      FileStatus contents[] = srcFS.listStatus(srcDir);
      for (int i = 0; i < contents.length; i++) {
        if (!contents[i].isDir()) {
          InputStream in = srcFS.open(contents[i].getPath());
          try {
            IOUtils.copyBytes(in, out, conf, false);
            if (addString != null) out.write(addString.getBytes("UTF-8"));

          } finally {
            in.close();
          }
        }
      }
    } finally {
      out.close();
    }

    if (deleteSource) {
      return srcFS.delete(srcDir, true);
    } else {
      return true;
    }
  }
 /**
  * The src file is under FS, and the dst is on the local disk. Copy it from FS control to the
  * local dst name. If src and dst are directories, the copyCrc parameter determines whether to
  * copy CRC files.
  */
 public void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException {
   if (!fs.isDirectory(src)) { // source is a file
     fs.copyToLocalFile(src, dst);
     FileSystem localFs = getLocal(getConf()).getRawFileSystem();
     if (localFs.isDirectory(dst)) {
       dst = new Path(dst, src.getName());
     }
     dst = getChecksumFile(dst);
     if (localFs.exists(dst)) { // remove old local checksum file
       localFs.delete(dst, true);
     }
     Path checksumFile = getChecksumFile(src);
     if (copyCrc && fs.exists(checksumFile)) { // copy checksum file
       fs.copyToLocalFile(checksumFile, dst);
     }
   } else {
     FileStatus[] srcs = listStatus(src);
     for (FileStatus srcFile : srcs) {
       copyToLocalFile(srcFile.getPath(), new Path(dst, srcFile.getPath().getName()), copyCrc);
     }
   }
 }
 public static void recursePath(Configuration conf, Path path, Job job) {
   try {
     FileSystem fs = path.getFileSystem(conf);
     FileStatus[] fstats = fs.listStatus(path);
     if (fstats != null) {
       for (FileStatus f : fstats) {
         Path p = f.getPath();
         ;
         if (fs.isFile(p)) {
           // connection times out otherwise
           System.err.println("file:" + p.toString());
           FileInputFormat.addInputPath(job, p);
         } else {
           System.err.println("dir:" + p.toString());
           recursePath(conf, p, job);
         }
       }
     }
   } catch (IOException e) {
     // shouldn't be here
     throw new RuntimeException(e);
   }
 }
  public static void extractQueryFeatures2HDFS(String filename, Job job) throws IOException {

    // Read the local image.jpg as a Mat
    Mat query_mat_float =
        Highgui.imread(LOCAL_USER_DIR + ID + INPUT + "/" + filename, CvType.CV_32FC3);
    // Convert RGB to GRAY
    Mat query_gray = new Mat();
    Imgproc.cvtColor(query_mat_float, query_gray, Imgproc.COLOR_RGB2GRAY);
    // Convert the float type to unsigned integer(required by SIFT)
    Mat query_mat_byte = new Mat();
    query_gray.convertTo(query_mat_byte, CvType.CV_8UC3);
    //        // Resize the image to 1/FACTOR both width and height
    //        Mat query_mat_byte = FeatureExtraction.resize(query_mat_byte);

    // Extract the feature from the (Mat)image
    Mat query_features = FeatureExtraction.extractFeature(query_mat_byte);

    System.out.println(PREFIX + "Extracting the query image feature...");
    System.out.println("query_mat(float,color):" + query_mat_float);
    System.out.println("query_mat(float,gray):" + query_gray);
    System.out.println("query_mat(byte,gray):" + query_mat_byte);
    System.out.println("query_mat_features:" + query_features);
    System.out.println();

    // Store the feature to the hdfs in order to use it later in different map tasks
    System.out.println(PREFIX + "Generating the feature file for the query image in HDFS...");
    FileSystem fs = FileSystem.get(job.getConfiguration());
    String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json";
    FSDataOutputStream fsDataOutputStream =
        fs.create(new Path(HDFS_HOME + USER + ID + INPUT + "/" + featureFileName));
    BufferedWriter bw =
        new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
    bw.write(FeatureExtraction.mat2json(query_features));
    bw.close();
    System.out.println(PREFIX + "Query feature extraction finished...");
    System.out.println();
  }
示例#23
0
  public BufferedReader loadDataFromFile(String filepath, long offset) {
    try {
      Path pt = new Path(filepath);
      FileSystem fs = FileSystem.get(fsConf);
      InputStreamReader isr;
      if (fs.isDirectory(pt)) { // multiple parts
        isr = new InputStreamReader(OpenMultiplePartsWithOffset(fs, pt, offset));
      } else { // single file
        FSDataInputStream fileHandler = fs.open(pt);
        if (offset > 0) fileHandler.seek(offset);
        isr = new InputStreamReader(fileHandler);
      }

      BufferedReader reader = new BufferedReader(isr);
      if (offset > 0) reader.readLine(); // skip first line in case of seek
      return reader;
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    assert false : "Should not reach here!";
    return null;
  }
示例#24
0
 /*
  * Returns the relative path of the dir this cache will be localized in
  * relative path that this cache will be localized in. For
  * hdfs://hostname:port/absolute_path -- the relative path is
  * hostname/absolute path -- if it is just /absolute_path -- then the
  * relative path is hostname of DFS this mapred cluster is running
  * on/absolute_path
  */
 public static String makeRelative(URI cache, Configuration conf) throws IOException {
   String host = cache.getHost();
   if (host == null) {
     host = cache.getScheme();
   }
   if (host == null) {
     URI defaultUri = FileSystem.get(conf).getUri();
     host = defaultUri.getHost();
     if (host == null) {
       host = defaultUri.getScheme();
     }
   }
   String path = host + cache.getPath();
   path = path.replace(":/", "/"); // remove windows device colon
   return path;
 }
示例#25
0
  /** Copy files between FileSystems. */
  public static boolean copy(
      FileSystem srcFS,
      Path src,
      FileSystem dstFS,
      Path dst,
      boolean deleteSource,
      boolean overwrite,
      Configuration conf)
      throws IOException {
    dst = checkDest(src.getName(), dstFS, dst, overwrite);

    if (srcFS.getFileStatus(src).isDir()) {
      checkDependencies(srcFS, src, dstFS, dst);
      if (!dstFS.mkdirs(dst)) {
        return false;
      }
      FileStatus contents[] = srcFS.listStatus(src);
      for (int i = 0; i < contents.length; i++) {
        copy(
            srcFS,
            contents[i].getPath(),
            dstFS,
            new Path(dst, contents[i].getPath().getName()),
            deleteSource,
            overwrite,
            conf);
      }
    } else if (srcFS.isFile(src)) {
      InputStream in = null;
      OutputStream out = null;
      try {
        in = srcFS.open(src);
        out = dstFS.create(dst, overwrite);
        IOUtils.copyBytes(in, out, conf, true);
      } catch (IOException e) {
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
        throw e;
      }
    } else {
      throw new IOException(src.toString() + ": No such file or directory");
    }
    if (deleteSource) {
      return srcFS.delete(src, true);
    } else {
      return true;
    }
  }
示例#26
0
 private static Path checkCacheStatusValidity(
     Configuration conf,
     URI cache,
     long confFileStamp,
     CacheStatus cacheStatus,
     FileStatus fileStatus,
     boolean isArchive)
     throws IOException {
   FileSystem fs = FileSystem.get(cache, conf);
   // Has to be
   if (!ifExistsAndFresh(conf, fs, cache, confFileStamp, cacheStatus, fileStatus)) {
     throw new IOException(
         "Stale cache file: " + cacheStatus.localizedLoadPath + " for cache-file: " + cache);
   }
   LOG.info(
       String.format(
           "Using existing cache of %s->%s", cache.toString(), cacheStatus.localizedLoadPath));
   return cacheStatus.localizedLoadPath;
 }
示例#27
0
 private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService)
     throws IOException {
   List<CacheStatus> deleteSet = new LinkedList<CacheStatus>();
   // try deleting cache Status with refcount of zero
   synchronized (cachedArchives) {
     for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) {
       String cacheId = (String) it.next();
       CacheStatus lcacheStatus = cachedArchives.get(cacheId);
       if (lcacheStatus.refcount == 0) {
         // delete this cache entry from the global list
         // and mark the localized file for deletion
         deleteSet.add(lcacheStatus);
         it.remove();
       }
     }
   }
   // do the deletion asynchronously, after releasing the global lock
   Thread cacheFileCleaner =
       new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet));
   cacheFileCleaner.start();
 }
  @SuppressWarnings("unchecked")
  public void testCommitter() throws Exception {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);

    // validate output
    File expectedFile = new File(new Path(outDir, file).toString());
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append('\t').append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append('\t').append(val2).append("\n");
    String output = UtilsForTests.slurp(expectedFile);
    assertEquals(output, expectedOutput.toString());

    FileUtil.fullyDelete(new File(outDir.toString()));
  }
  public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    // LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

      // LOG.info("creating; entries = " + length);

      // create a file with length entries
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
      try {
        for (int i = 0; i < length; i++) {
          IntWritable key = new IntWritable(i);
          byte[] data = new byte[random.nextInt(10)];
          random.nextBytes(data);
          BytesWritable value = new BytesWritable(data);
          writer.append(key, value);
        }
      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat<IntWritable, BytesWritable> format =
          new SequenceFileInputFormat<IntWritable, BytesWritable>();
      IntWritable key = new IntWritable();
      BytesWritable value = new BytesWritable();
      for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        // LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        // LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader<IntWritable, BytesWritable> reader =
              format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " +
              // key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.get()));
              bits.set(key.get());
              count++;
            }
            // LOG.info("splits["+j+"]="+splits[j]+" count=" +
            // count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }
    }
  }
  public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    if (args.length != 2) {
      System.out.println("Usage: FeatureMatching ID <inputName.jpeg/inputName.jpg>");
      System.exit(1);
    }

    SimpleDateFormat sdf = new SimpleDateFormat("", Locale.US);
    sdf.applyPattern("yyyy-MM-dd_HH-mm-ss");
    String time = sdf.format(new Date());

    Job job = Job.getInstance();

    ID = "/" + args[0];
    String filename = args[1];
    filename = filename.toLowerCase();
    System.out.println("current filename:" + filename);

    // Detect illegal username (if the username dir doesn't exist)
    File userPath = new File(LOCAL_USER_DIR + ID);
    if (!userPath.exists()) {
      System.out.println("Unauthorized username!!!\nExiting......");
      System.exit(1);
    }
    // Preprocess the input image.jpg from local dir: /local.../user/ID/input/image.jpg
    // Save the features to local dir: hdfs://.../user/ID/input/image.jpg
    extractQueryFeatures2HDFS(filename, job);

    // Add the feature file to the hdfs cache
    String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json";
    //        job.addCacheFile(new Path(HDFS_HOME + USER + ID + INPUT + "/" +
    // featureFileName).toUri());
    job.getConfiguration()
        .set("featureFilePath", HDFS_HOME + USER + ID + INPUT + "/" + featureFileName);

    // Check the file type. Only support jpeg/jpg type images
    String type = filename.substring(args[1].lastIndexOf("."));
    if (!(type.equals(".jpg") || type.equals(".jpeg"))) {
      System.out.println("Image type not supported!!!\nExiting");
      System.exit(1);
    }

    // Input: hdfs://.../features/
    // The feature dir is a location of all features extracted from the database
    String inputPathStr = HDFS_HOME + FEATURES;
    // Output: hdfs://.../user/ID/output/
    String outputPathStr = HDFS_HOME + USER + ID + OUTPUT + "/" + time;

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    // Get the lists of all feature files: /.../features/data/part-*
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] statuses = fs.listStatus(new Path(inputPathStr));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : statuses) {
      sb.append(fileStatus.getPath() + ",");
    }
    sb.deleteCharAt(sb.lastIndexOf(","));

    job.setJarByClass(FeatureMatching.class);
    job.setMapperClass(FeatureMatchMapper.class);
    job.setReducerClass(FeatureMatchReducer.class);

    // only need one reducer to collect the result
    job.setNumReduceTasks(1);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // Input a directory, so need the recursive input
    FileInputFormat.setInputDirRecursive(job, true);
    // Set the PathFilter, to omit _SUCCESS files
    // (This is not working correctly, as the PathFilter class is an interface rather than a class.
    // But the 2nd arg asks me to extend the PathFilter)
    //        FileInputFormat.setInputPathFilter(job, MyPathFilter.class);
    //
    //        FileInputFormat.setInputPaths(job, new Path(inputPathStr));
    FileInputFormat.setInputPaths(job, sb.toString());
    FileOutputFormat.setOutputPath(job, new Path(outputPathStr));

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
  }