@Override
    public void run() {
      String fileName = null;
      try {
        // Init Amazon S3
        AmazonS3 s3 =
            new AmazonS3Client(
                new BasicAWSCredentials(
                    "AKIAI42VQL6SUU5S27SA", "/P3g0/U/hnKgWKRMsRXtG1w+gM+H0dwtLPnuvOBG"));
        String bucketName = "diaosi-mapreduce";

        // Download and Extract
        fileName = s3Path.substring(s3Path.indexOf("enwiki-20121001-"));
        fileName = fileName.substring(0, fileName.indexOf("bz2")) + ".xml";
        FileOutputStream fout = new FileOutputStream(fileName);

        URL url = new URL(s3Path);
        URLConnection conn = url.openConnection();
        conn.setConnectTimeout(20000);
        conn.setReadTimeout(20000);
        InputStream is = conn.getInputStream();
        is.read();
        is.read();
        CBZip2InputStream fin = new CBZip2InputStream(is);
        while (true) {
          int len = fin.read(buf, 0, buf.length);
          System.out.println(fileName + ": " + len);
          if (len == -1) break;
          fout.write(buf, 0, len);
        }
        fout.flush();
        fout.close();
        fin.close();

        // Upload to S3
        File file = new File(fileName);
        String key = fileName;
        s3.putObject(new PutObjectRequest(bucketName, key, file));

      } catch (Exception e) {
        e.printStackTrace();
      }
    }
Example #2
0
  /** Do the unbzipping. */
  protected void extract() {
    if (source.lastModified() > dest.lastModified()) {
      log("Expanding " + source.getAbsolutePath() + " to " + dest.getAbsolutePath());

      FileOutputStream out = null;
      CBZip2InputStream zIn = null;
      InputStream fis = null;
      BufferedInputStream bis = null;
      try {
        out = new FileOutputStream(dest);
        fis = srcResource.getInputStream();
        bis = new BufferedInputStream(fis);
        int b = bis.read();
        if (b != 'B') {
          throw new BuildException("Invalid bz2 file.", getLocation());
        }
        b = bis.read();
        if (b != 'Z') {
          throw new BuildException("Invalid bz2 file.", getLocation());
        }
        zIn = new CBZip2InputStream(bis, true);
        byte[] buffer = new byte[BUFFER_SIZE];
        int count = 0;
        do {
          out.write(buffer, 0, count);
          count = zIn.read(buffer, 0, buffer.length);
        } while (count != -1);
      } catch (IOException ioe) {
        String msg = "Problem expanding bzip2 " + ioe.getMessage();
        throw new BuildException(msg, ioe, getLocation());
      } finally {
        FileUtils.close(bis);
        FileUtils.close(fis);
        FileUtils.close(out);
        FileUtils.close(zIn);
      }
    }
  }