/** * Check whether the contents of src and dst are the same. * * <p>Return false if dstpath does not exist * * <p>If the files have different sizes, return false. * * <p>If the files have the same sizes, the file checksums will be compared. * * <p>When file checksum is not supported in any of file systems, two files are considered as the * same if they have the same size. */ private static boolean sameFile( FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } // same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } // compare checksums try { final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath()); final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); // return true if checksum is not supported // (i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
/** * Populates FileIndexDescriptor with common things like name, checksum, etc. * * @param path * @return * @throws IOException */ protected FileIndexDescriptor buildFileIndexDescriptor(Path path) throws IOException { FileIndexDescriptor fid = new FileIndexDescriptor(); fid.setSourcePath(path.toString()); fid.setDocType(getExpectedDocType()); FileChecksum cksum = path.getFileSystem(getConf()).getFileChecksum(path); com.twitter.elephanttwin.gen.FileChecksum fidCksum = new com.twitter.elephanttwin.gen.FileChecksum( cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength()); fid.setChecksum(fidCksum); fid.setIndexedFields(getIndexedFields()); fid.setIndexType(getIndexType()); fid.setIndexVersion(getIndexVersion()); return fid; }
/** * Check whether the contents of src and dst are the same. * * <p>Return false if dstpath does not exist * * <p>If the files have different sizes, return false. * * <p>If the files have the same sizes, the file checksums will be compared. * * <p>When file checksum is not supported in any of file systems, two files are considered as the * same if they have the same size. */ private static boolean sameFile( FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } // same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } // get src checksum final FileChecksum srccs; try { srccs = srcfs.getFileChecksum(srcstatus.getPath()); } catch (FileNotFoundException fnfe) { /* * Two possible cases: * (1) src existed once but was deleted between the time period that * srcstatus was obtained and the try block above. * (2) srcfs does not support file checksum and (incorrectly) throws * FNFE, e.g. some previous versions of HftpFileSystem. * For case (1), it is okay to return true since src was already deleted. * For case (2), true should be returned. */ return true; } // compare checksums try { final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); // return true if checksum is not supported // (i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
/** * Compare the checksums of the hdfs file as well as the local copied file. * * @author [email protected] * @date Fri Jan 27 06:06:00 2012 */ boolean compareChecksums(FileSystem fs, Path p, String sFsPath) { try { // get hdfs file info FileStatus stat = fs.getFileStatus(p); // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } // System.out.println(p.toUri().getPath() + " len=" + stat.getLen() // + " " + stat.getOwner() + "/" + stat.getGroup() // + " checksum=" + sCk); // find the local file File fLocal = new File(sFsPath); if (!fLocal.exists()) { System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath); return false; } if (!fLocal.isFile()) { System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath); return false; } if (stat.getLen() != fLocal.length()) { System.out.println( "CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return false; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return false; } // compare checksums as a string, after stripping the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println( "CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return false; } return true; } catch (IOException e) { System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString()); } return false; }