/** * Constructs a new instance of {@link S3UnderFileSystem}. * * @param uri the {@link AlluxioURI} for this UFS * @param conf the configuration for Alluxio * @param awsCredentials AWS Credentials configuration for S3 Access * @throws ServiceException when a connection to S3 could not be created */ public S3UnderFileSystem(AlluxioURI uri, Configuration conf, AWSCredentials awsCredentials) throws ServiceException { super(uri, conf); String bucketName = uri.getHost(); mBucketName = bucketName; Jets3tProperties props = new Jets3tProperties(); if (conf.containsKey(Constants.UNDERFS_S3_PROXY_HOST)) { props.setProperty("httpclient.proxy-autodetect", "false"); props.setProperty("httpclient.proxy-host", conf.get(Constants.UNDERFS_S3_PROXY_HOST)); props.setProperty("httpclient.proxy-port", conf.get(Constants.UNDERFS_S3_PROXY_PORT)); } if (conf.containsKey(Constants.UNDERFS_S3_PROXY_HTTPS_ONLY)) { props.setProperty( "s3service.https-only", Boolean.toString(conf.getBoolean(Constants.UNDERFS_S3_PROXY_HTTPS_ONLY))); } if (conf.containsKey(Constants.UNDERFS_S3_ENDPOINT)) { props.setProperty("s3service.s3-endpoint", conf.get(Constants.UNDERFS_S3_ENDPOINT)); if (conf.getBoolean(Constants.UNDERFS_S3_PROXY_HTTPS_ONLY)) { props.setProperty( "s3service.s3-endpoint-https-port", conf.get(Constants.UNDERFS_S3_ENDPOINT_HTTPS_PORT)); } else { props.setProperty( "s3service.s3-endpoint-http-port", conf.get(Constants.UNDERFS_S3_ENDPOINT_HTTP_PORT)); } } if (conf.containsKey(Constants.UNDERFS_S3_DISABLE_DNS_BUCKETS)) { props.setProperty( "s3service.disable-dns-buckets", conf.get(Constants.UNDERFS_S3_DISABLE_DNS_BUCKETS)); } LOG.debug("Initializing S3 underFs with properties: {}", props.getProperties()); mClient = new RestS3Service(awsCredentials, null, null, props); mBucketPrefix = PathUtils.normalizePath(Constants.HEADER_S3N + mBucketName, PATH_SEPARATOR); }
/** Tests if list correctly returns file or folder names for a large directory. */ @Test public void listLargeDirectory() throws IOException { LargeDirectoryConfig config = prepareLargeDirectoryTest(); String[] children = config.getChildren(); // Retry for some time to allow list operation eventual consistency for S3 and GCS. // See http://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html and // https://cloud.google.com/storage/docs/consistency for more details. // Note: not using CommonUtils.waitFor here because we intend to sleep with a longer interval. String[] results = new String[] {}; for (int i = 0; i < 20; i++) { results = mUfs.list(config.getTopLevelDirectory()); if (children.length == results.length) { break; } CommonUtils.sleepMs(500); } Assert.assertEquals(children.length, results.length); Arrays.sort(results); for (int i = 0; i < children.length; ++i) { Assert.assertTrue( results[i].equals( CommonUtils.stripPrefixIfPresent( children[i], PathUtils.normalizePath(config.getTopLevelDirectory(), "/")))); } }
@Override public String[] list(String path) throws IOException { // if the path not exists, or it is a file, then should return null if (!exists(path) || isFile(path)) { return null; } // Non recursive list path = PathUtils.normalizePath(path, PATH_SEPARATOR); return listInternal(path, false); }
/** * Lists the files in the given path, the paths will be their logical names and not contain the * folder suffix. * * @param path the key to list * @param recursive if true will list children directories as well * @return an array of the file and folder names in this directory * @throws IOException if an I/O error occurs */ private String[] listInternal(String path, boolean recursive) throws IOException { try { path = stripPrefixIfPresent(path); path = PathUtils.normalizePath(path, PATH_SEPARATOR); path = path.equals(PATH_SEPARATOR) ? "" : path; // Gets all the objects under the path, because we have no idea if there are non Alluxio // managed "directories" S3Object[] objs = mClient.listObjects(mBucketName, path, ""); if (recursive) { List<String> ret = new ArrayList<>(); for (S3Object obj : objs) { // Remove parent portion of the key String child = getChildName(obj.getKey(), path); // Prune the special folder suffix child = stripFolderSuffixIfPresent(child); // Only add if the path is not empty (removes results equal to the path) if (!child.isEmpty()) { ret.add(child); } } return ret.toArray(new String[ret.size()]); } // Non recursive list Set<String> children = new HashSet<String>(); for (S3Object obj : objs) { // Remove parent portion of the key String child = getChildName(obj.getKey(), path); // Remove any portion after the path delimiter int childNameIndex = child.indexOf(PATH_SEPARATOR); child = childNameIndex != -1 ? child.substring(0, childNameIndex) : child; // Prune the special folder suffix child = stripFolderSuffixIfPresent(child); // Add to the set of children, the set will deduplicate. if (!child.isEmpty()) { children.add(child); } } return children.toArray(new String[children.size()]); } catch (ServiceException e) { LOG.error("Failed to list path {}", path, e); return null; } }
/** * A trailing {@link SwiftUnderFileSystem#FOLDER_SUFFIX} is added if not present. * * @param path URI to the object * @return folder path */ private String addFolderSuffixIfNotPresent(String path) { return PathUtils.normalizePath(path, FOLDER_SUFFIX); }
/** * Checks if the key is the root. * * @param key the key to check * @return true if the key is the root, false otherwise */ private boolean isRoot(String key) { return PathUtils.normalizePath(key, PATH_SEPARATOR) .equals(PathUtils.normalizePath(Constants.HEADER_S3N + mBucketName, PATH_SEPARATOR)); }