protected void initStream(InputSplit inSplit) throws IOException { file = ((FileSplit) inSplit).getPath(); FSDataInputStream fileIn = fs.open(file); String codecString = conf.get(ConfigConstants.CONF_INPUT_COMPRESSION_CODEC, CompressionCodec.ZIP.toString()) .toUpperCase(); codec = CompressionCodec.valueOf(codecString); switch (codec) { case ZIP: zipIn = new ZipInputStream(fileIn); break; case GZIP: zipIn = new GZIPInputStream(fileIn); String uri = makeURIFromPath(file); if (uri == null) { key = null; } else { if (uri.toLowerCase().endsWith(".gz") || uri.toLowerCase().endsWith(".gzip")) { uri = uri.substring(0, uri.lastIndexOf('.')); } setKey(uri); } break; default: String error = "Unsupported codec: " + codec.name(); LOG.error(error, new UnsupportedOperationException(error)); } }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (zipIn == null) { hasNext = false; return false; } if (codec == CompressionCodec.ZIP) { ZipEntry zipEntry; ZipInputStream zis = (ZipInputStream) zipIn; while ((zipEntry = zis.getNextEntry()) != null) { if (zipEntry != null && zipEntry.getSize() != 0) { String uri = makeURIForZipEntry(file, zipEntry.getName()); if (uri != null) { setKey(uri); setValue(zipEntry.getSize()); } else { key = null; } return true; } } } else if (codec == CompressionCodec.GZIP) { setValue(0); zipIn.close(); zipIn = null; hasNext = false; return true; } else { throw new UnsupportedOperationException("Unsupported codec: " + codec.name()); } if (iterator != null && iterator.hasNext()) { close(); initStream(iterator.next()); return nextKeyValue(); } else { hasNext = false; return false; } }