Пример #1
0
  @Test
  public void testAllFieldsWritable() throws IOException {
    Range[] ranges = new Range[] {new Range(new Key("a"), new Key("b"))};
    BatchInputSplit split =
        new BatchInputSplit("table", "1", Arrays.asList(ranges), new String[] {"localhost"});

    Set<Pair<Text, Text>> fetchedColumns = new HashSet<>();

    fetchedColumns.add(new Pair<>(new Text("colf1"), new Text("colq1")));
    fetchedColumns.add(new Pair<>(new Text("colf2"), new Text("colq2")));

    // Fake some iterators
    ArrayList<IteratorSetting> iterators = new ArrayList<>();
    IteratorSetting setting = new IteratorSetting(50, SummingCombiner.class);
    setting.addOption("foo", "bar");
    iterators.add(setting);

    setting = new IteratorSetting(100, WholeRowIterator.class);
    setting.addOption("bar", "foo");
    iterators.add(setting);

    split.setTableName("table");
    split.setAuths(new Authorizations("foo"));
    split.setFetchedColumns(fetchedColumns);
    split.setToken(new PasswordToken("password"));
    split.setPrincipal("root");
    DeprecationUtil.setMockInstance(split, true);
    split.setInstanceName("instance");
    split.setZooKeepers("localhost");
    split.setIterators(iterators);
    split.setLogLevel(Level.WARN);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    split.write(dos);

    BatchInputSplit newSplit = new BatchInputSplit();

    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    DataInputStream dis = new DataInputStream(bais);
    newSplit.readFields(dis);

    Assert.assertEquals(split.getRanges(), newSplit.getRanges());
    Assert.assertArrayEquals(split.getLocations(), newSplit.getLocations());

    Assert.assertEquals(split.getTableName(), newSplit.getTableName());
    Assert.assertEquals(split.getAuths(), newSplit.getAuths());
    Assert.assertEquals(split.getFetchedColumns(), newSplit.getFetchedColumns());
    Assert.assertEquals(split.getToken(), newSplit.getToken());
    Assert.assertEquals(split.getPrincipal(), newSplit.getPrincipal());
    Assert.assertEquals(split.getInstanceName(), newSplit.getInstanceName());
    Assert.assertEquals(
        DeprecationUtil.isMockInstanceSet(split), DeprecationUtil.isMockInstanceSet(newSplit));
    Assert.assertEquals(split.getZooKeepers(), newSplit.getZooKeepers());
    Assert.assertEquals(split.getIterators(), newSplit.getIterators());
    Assert.assertEquals(split.getLogLevel(), newSplit.getLogLevel());
  }
  /**
   * Gets the splits of the tables that have been set on the job by reading the metadata table for
   * the specified ranges.
   *
   * @return the splits from the tables based on the ranges.
   * @throws java.io.IOException if a table set on the job doesn't exist or an error occurs
   *     initializing the tablet locator
   */
  @Override
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);

    Random random = new Random();
    LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
    Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
      String tableName = tableConfigEntry.getKey();
      InputTableConfig tableConfig = tableConfigEntry.getValue();

      Instance instance = getInstance(job);
      String tableId;
      // resolve table name to id once, and use id from this point forward
      if (DeprecationUtil.isMockInstance(instance)) {
        tableId = "";
      } else {
        try {
          tableId = Tables.getTableId(instance, tableName);
        } catch (TableNotFoundException e) {
          throw new IOException(e);
        }
      }

      Authorizations auths = getScanAuthorizations(job);
      String principal = getPrincipal(job);
      AuthenticationToken token = getAuthenticationToken(job);

      boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
      boolean supportBatchScan =
          !(tableConfig.isOfflineScan()
              || tableConfig.shouldUseIsolatedScanners()
              || tableConfig.shouldUseLocalIterators());
      if (batchScan && !supportBatchScan)
        throw new IllegalArgumentException(
            "BatchScanner optimization not available for offline scan, isolated, or local iterators");

      boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
      if (batchScan && !autoAdjust)
        throw new IllegalArgumentException(
            "AutoAdjustRanges must be enabled when using BatchScanner optimization");

      List<Range> ranges =
          autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
      if (ranges.isEmpty()) {
        ranges = new ArrayList<Range>(1);
        ranges.add(new Range());
      }

      // get the metadata information for these ranges
      Map<String, Map<KeyExtent, List<Range>>> binnedRanges =
          new HashMap<String, Map<KeyExtent, List<Range>>>();
      TabletLocator tl;
      try {
        if (tableConfig.isOfflineScan()) {
          binnedRanges = binOfflineTable(job, tableId, ranges);
          while (binnedRanges == null) {
            // Some tablets were still online, try again
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            binnedRanges = binOfflineTable(job, tableId, ranges);
          }
        } else {
          tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
          // its possible that the cache could contain complete, but old information about a tables
          // tablets... so clear it
          tl.invalidateCache();

          ClientContext context =
              new ClientContext(
                  getInstance(job),
                  new Credentials(getPrincipal(job), getAuthenticationToken(job)),
                  getClientConfiguration(job));
          while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
            if (!DeprecationUtil.isMockInstance(instance)) {
              if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId);
              if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
                throw new TableOfflineException(instance, tableId);
            }
            binnedRanges.clear();
            log.warn("Unable to locate bins for specified ranges. Retrying.");
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            tl.invalidateCache();
          }
        }
      } catch (Exception e) {
        throw new IOException(e);
      }

      HashMap<Range, ArrayList<String>> splitsToAdd = null;

      if (!autoAdjust) splitsToAdd = new HashMap<Range, ArrayList<String>>();

      HashMap<String, String> hostNameCache = new HashMap<String, String>();
      for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
        String ip = tserverBin.getKey().split(":", 2)[0];
        String location = hostNameCache.get(ip);
        if (location == null) {
          InetAddress inetAddress = InetAddress.getByName(ip);
          location = inetAddress.getCanonicalHostName();
          hostNameCache.put(ip, location);
        }
        for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
          Range ke = extentRanges.getKey().toDataRange();
          if (batchScan) {
            // group ranges by tablet to be read by a BatchScanner
            ArrayList<Range> clippedRanges = new ArrayList<Range>();
            for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));

            BatchInputSplit split =
                new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location});
            SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);

            splits.add(split);
          } else {
            // not grouping by tablet
            for (Range r : extentRanges.getValue()) {
              if (autoAdjust) {
                // divide ranges into smaller ranges, based on the tablets
                RangeInputSplit split =
                    new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
                SplitUtils.updateSplit(
                    split, instance, tableConfig, principal, token, auths, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                splits.add(split);
              } else {
                // don't divide ranges
                ArrayList<String> locations = splitsToAdd.get(r);
                if (locations == null) locations = new ArrayList<String>(1);
                locations.add(location);
                splitsToAdd.put(r, locations);
              }
            }
          }
        }
      }

      if (!autoAdjust)
        for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
          RangeInputSplit split =
              new RangeInputSplit(
                  tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
          SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
          split.setOffline(tableConfig.isOfflineScan());
          split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
          split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

          splits.add(split);
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
  }
    /** Initialize a scanner over the given input split using this task attempt configuration. */
    public void initialize(InputSplit inSplit, JobConf job) throws IOException {
      baseSplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inSplit;
      log.debug("Initializing input split: " + baseSplit.toString());

      Instance instance = baseSplit.getInstance(getClientConfiguration(job));
      if (null == instance) {
        instance = getInstance(job);
      }

      String principal = baseSplit.getPrincipal();
      if (null == principal) {
        principal = getPrincipal(job);
      }

      AuthenticationToken token = baseSplit.getToken();
      if (null == token) {
        token = getAuthenticationToken(job);
      }

      Authorizations authorizations = baseSplit.getAuths();
      if (null == authorizations) {
        authorizations = getScanAuthorizations(job);
      }
      String classLoaderContext = getClassLoaderContext(job);
      String table = baseSplit.getTableName();

      // in case the table name changed, we can still use the previous name for terms of
      // configuration,
      // but the scanner will use the table id resolved at job setup time
      InputTableConfig tableConfig = getInputTableConfig(job, baseSplit.getTableName());

      log.debug("Creating connector with user: "******"Creating scanner for table: " + table);
      log.debug("Authorizations are: " + authorizations);

      if (baseSplit instanceof BatchInputSplit) {
        BatchScanner scanner;
        BatchInputSplit multiRangeSplit = (BatchInputSplit) baseSplit;

        try {
          // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
          // will not span tablets
          int scanThreads = 1;
          scanner =
              instance
                  .getConnector(principal, token)
                  .createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads);
          setupIterators(job, scanner, baseSplit.getTableName(), baseSplit);
          if (null != classLoaderContext) {
            scanner.setClassLoaderContext(classLoaderContext);
          }
        } catch (Exception e) {
          throw new IOException(e);
        }

        scanner.setRanges(multiRangeSplit.getRanges());
        scannerBase = scanner;

      } else if (baseSplit instanceof RangeInputSplit) {
        split = (RangeInputSplit) baseSplit;
        Boolean isOffline = baseSplit.isOffline();
        if (null == isOffline) {
          isOffline = tableConfig.isOfflineScan();
        }

        Boolean isIsolated = baseSplit.isIsolatedScan();
        if (null == isIsolated) {
          isIsolated = tableConfig.shouldUseIsolatedScanners();
        }

        Boolean usesLocalIterators = baseSplit.usesLocalIterators();
        if (null == usesLocalIterators) {
          usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }

        Scanner scanner;

        try {
          if (isOffline) {
            scanner =
                new OfflineScanner(
                    instance,
                    new Credentials(principal, token),
                    baseSplit.getTableId(),
                    authorizations);
          } else if (DeprecationUtil.isMockInstance(instance)) {
            scanner =
                instance
                    .getConnector(principal, token)
                    .createScanner(baseSplit.getTableName(), authorizations);
          } else {
            ClientConfiguration clientConf = getClientConfiguration(job);
            ClientContext context =
                new ClientContext(instance, new Credentials(principal, token), clientConf);
            scanner = new ScannerImpl(context, baseSplit.getTableId(), authorizations);
          }
          if (isIsolated) {
            log.info("Creating isolated scanner");
            scanner = new IsolatedScanner(scanner);
          }
          if (usesLocalIterators) {
            log.info("Using local iterators");
            scanner = new ClientSideIteratorScanner(scanner);
          }
          setupIterators(job, scanner, baseSplit.getTableName(), baseSplit);
        } catch (Exception e) {
          throw new IOException(e);
        }

        scanner.setRange(baseSplit.getRange());
        scannerBase = scanner;
      } else {
        throw new IllegalArgumentException(
            "Can not initialize from " + baseSplit.getClass().toString());
      }

      Collection<Pair<Text, Text>> columns = baseSplit.getFetchedColumns();
      if (null == columns) {
        columns = tableConfig.getFetchedColumns();
      }

      // setup a scanner within the bounds of this split
      for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
          log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
          scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
          log.debug("Fetching column family " + c.getFirst());
          scannerBase.fetchColumnFamily(c.getFirst());
        }
      }

      SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
      if (null == samplerConfig) {
        samplerConfig = tableConfig.getSamplerConfiguration();
      }

      if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
      }

      scannerIterator = scannerBase.iterator();
      numKeysRead = 0;
    }