public SortingBucketMerger( RowDownstream rowDownstream, int numBuckets, int[] orderByPositions, boolean[] reverseFlags, Boolean[] nullsFirst, Optional<Executor> executor) { Preconditions.checkArgument(numBuckets > 0, "must at least get 1 bucket per merge call"); this.numBuckets = numBuckets; this.executor = executor; List<Comparator<Row>> comparators = new ArrayList<>(orderByPositions.length); for (int i = 0; i < orderByPositions.length; i++) { OrderingByPosition<Row> rowOrdering = OrderingByPosition.rowOrdering(orderByPositions[i], reverseFlags[i], nullsFirst[i]); comparators.add(rowOrdering.reverse()); } ordering = Ordering.compound(comparators); downstream = rowDownstream.registerUpstream(this); }
public FileReadingCollector( String fileUri, List<Input<?>> inputs, List<LineCollectorExpression<?>> collectorExpressions, RowDownstream downstream, FileFormat format, String compression, Map<String, FileInputFactory> additionalFileInputFactories, Boolean shared, int numReaders, int readerNumber) { if (fileUri.startsWith("/")) { // using Paths.get().toUri instead of new URI(...) as it also encodes umlauts and other // special characters this.fileUri = Paths.get(fileUri).toUri(); } else { this.fileUri = URI.create(fileUri); if (this.fileUri.getScheme() == null) { throw new IllegalArgumentException("relative fileURIs are not allowed"); } if (this.fileUri.getScheme().equals("file") && !this.fileUri.getSchemeSpecificPart().startsWith("///")) { throw new IllegalArgumentException("Invalid fileURI"); } if (!this.fileUri.getScheme().equals("file") && !this.fileUri.getScheme().equals("s3")) { throw new IllegalArgumentException("URI scheme is not supported"); } } this.downstream = downstream.registerUpstream(this); this.compressed = compression != null && compression.equalsIgnoreCase("gzip"); this.row = new InputRow(inputs); this.collectorExpressions = collectorExpressions; this.fileInputFactoryMap = new HashMap<>( ImmutableMap.of( "s3", new FileInputFactory() { @Override public FileInput create() throws IOException { return new S3FileInput(); } }, "file", new FileInputFactory() { @Override public FileInput create() throws IOException { return new LocalFsFileInput(); } })); this.fileInputFactoryMap.putAll(additionalFileInputFactories); this.shared = shared; this.numReaders = numReaders; this.readerNumber = readerNumber; Matcher hasGlobMatcher = HAS_GLOBS_PATTERN.matcher(this.fileUri.toString()); if (!hasGlobMatcher.matches()) { globPredicate = null; } else { this.preGlobUri = URI.create(hasGlobMatcher.group(1)); final Pattern globPattern = Pattern.compile(Globs.toUnixRegexPattern(this.fileUri.toString())); globPredicate = new Predicate<URI>() { @Override public boolean apply(URI input) { return globPattern.matcher(input.toString()).matches(); } }; } }