@Override public void run() { String fileName = null; try { // Init Amazon S3 AmazonS3 s3 = new AmazonS3Client( new BasicAWSCredentials( "AKIAI42VQL6SUU5S27SA", "/P3g0/U/hnKgWKRMsRXtG1w+gM+H0dwtLPnuvOBG")); String bucketName = "diaosi-mapreduce"; // Download and Extract fileName = s3Path.substring(s3Path.indexOf("enwiki-20121001-")); fileName = fileName.substring(0, fileName.indexOf("bz2")) + ".xml"; FileOutputStream fout = new FileOutputStream(fileName); URL url = new URL(s3Path); URLConnection conn = url.openConnection(); conn.setConnectTimeout(20000); conn.setReadTimeout(20000); InputStream is = conn.getInputStream(); is.read(); is.read(); CBZip2InputStream fin = new CBZip2InputStream(is); while (true) { int len = fin.read(buf, 0, buf.length); System.out.println(fileName + ": " + len); if (len == -1) break; fout.write(buf, 0, len); } fout.flush(); fout.close(); fin.close(); // Upload to S3 File file = new File(fileName); String key = fileName; s3.putObject(new PutObjectRequest(bucketName, key, file)); } catch (Exception e) { e.printStackTrace(); } }
/** Do the unbzipping. */ protected void extract() { if (source.lastModified() > dest.lastModified()) { log("Expanding " + source.getAbsolutePath() + " to " + dest.getAbsolutePath()); FileOutputStream out = null; CBZip2InputStream zIn = null; InputStream fis = null; BufferedInputStream bis = null; try { out = new FileOutputStream(dest); fis = srcResource.getInputStream(); bis = new BufferedInputStream(fis); int b = bis.read(); if (b != 'B') { throw new BuildException("Invalid bz2 file.", getLocation()); } b = bis.read(); if (b != 'Z') { throw new BuildException("Invalid bz2 file.", getLocation()); } zIn = new CBZip2InputStream(bis, true); byte[] buffer = new byte[BUFFER_SIZE]; int count = 0; do { out.write(buffer, 0, count); count = zIn.read(buffer, 0, buffer.length); } while (count != -1); } catch (IOException ioe) { String msg = "Problem expanding bzip2 " + ioe.getMessage(); throw new BuildException(msg, ioe, getLocation()); } finally { FileUtils.close(bis); FileUtils.close(fis); FileUtils.close(out); FileUtils.close(zIn); } } }