public SortingBucketMerger(
     RowDownstream rowDownstream,
     int numBuckets,
     int[] orderByPositions,
     boolean[] reverseFlags,
     Boolean[] nullsFirst,
     Optional<Executor> executor) {
   Preconditions.checkArgument(numBuckets > 0, "must at least get 1 bucket per merge call");
   this.numBuckets = numBuckets;
   this.executor = executor;
   List<Comparator<Row>> comparators = new ArrayList<>(orderByPositions.length);
   for (int i = 0; i < orderByPositions.length; i++) {
     OrderingByPosition<Row> rowOrdering =
         OrderingByPosition.rowOrdering(orderByPositions[i], reverseFlags[i], nullsFirst[i]);
     comparators.add(rowOrdering.reverse());
   }
   ordering = Ordering.compound(comparators);
   downstream = rowDownstream.registerUpstream(this);
 }
Esempio n. 2
0
  public FileReadingCollector(
      String fileUri,
      List<Input<?>> inputs,
      List<LineCollectorExpression<?>> collectorExpressions,
      RowDownstream downstream,
      FileFormat format,
      String compression,
      Map<String, FileInputFactory> additionalFileInputFactories,
      Boolean shared,
      int numReaders,
      int readerNumber) {
    if (fileUri.startsWith("/")) {
      // using Paths.get().toUri instead of new URI(...) as it also encodes umlauts and other
      // special characters
      this.fileUri = Paths.get(fileUri).toUri();
    } else {
      this.fileUri = URI.create(fileUri);
      if (this.fileUri.getScheme() == null) {
        throw new IllegalArgumentException("relative fileURIs are not allowed");
      }
      if (this.fileUri.getScheme().equals("file")
          && !this.fileUri.getSchemeSpecificPart().startsWith("///")) {
        throw new IllegalArgumentException("Invalid fileURI");
      }
      if (!this.fileUri.getScheme().equals("file") && !this.fileUri.getScheme().equals("s3")) {
        throw new IllegalArgumentException("URI scheme is not supported");
      }
    }
    this.downstream = downstream.registerUpstream(this);
    this.compressed = compression != null && compression.equalsIgnoreCase("gzip");
    this.row = new InputRow(inputs);
    this.collectorExpressions = collectorExpressions;
    this.fileInputFactoryMap =
        new HashMap<>(
            ImmutableMap.of(
                "s3",
                    new FileInputFactory() {
                      @Override
                      public FileInput create() throws IOException {
                        return new S3FileInput();
                      }
                    },
                "file",
                    new FileInputFactory() {

                      @Override
                      public FileInput create() throws IOException {
                        return new LocalFsFileInput();
                      }
                    }));
    this.fileInputFactoryMap.putAll(additionalFileInputFactories);
    this.shared = shared;
    this.numReaders = numReaders;
    this.readerNumber = readerNumber;
    Matcher hasGlobMatcher = HAS_GLOBS_PATTERN.matcher(this.fileUri.toString());
    if (!hasGlobMatcher.matches()) {
      globPredicate = null;
    } else {
      this.preGlobUri = URI.create(hasGlobMatcher.group(1));
      final Pattern globPattern =
          Pattern.compile(Globs.toUnixRegexPattern(this.fileUri.toString()));
      globPredicate =
          new Predicate<URI>() {
            @Override
            public boolean apply(URI input) {
              return globPattern.matcher(input.toString()).matches();
            }
          };
    }
  }