Пример #1
0
    private String runResolveCommand(List<String> args) {
      int loopCount = 0;
      if (args.size() == 0) {
        return null;
      }
      StringBuffer allOutput = new StringBuffer();
      int numProcessed = 0;
      if (maxArgs < MIN_ALLOWABLE_ARGS) {
        LOG.warn(
            "Invalid value "
                + Integer.toString(maxArgs)
                + " for "
                + SCRIPT_ARG_COUNT_KEY
                + "; must be >= "
                + Integer.toString(MIN_ALLOWABLE_ARGS));
        return null;
      }

      while (numProcessed != args.size()) {
        int start = maxArgs * loopCount;
        List<String> cmdList = new ArrayList<String>();
        cmdList.add(scriptName);
        for (numProcessed = start;
            numProcessed < (start + maxArgs) && numProcessed < args.size();
            numProcessed++) {
          cmdList.add(args.get(numProcessed));
        }
        File dir = null;
        String userDir;
        if ((userDir = System.getProperty("user.dir")) != null) {
          dir = new File(userDir);
        }
        ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir);
        try {
          s.execute();
          allOutput.append(s.getOutput() + " ");
        } catch (Exception e) {
          LOG.warn(StringUtils.stringifyException(e));
          return null;
        }
        loopCount++;
      }
      return allOutput.toString();
    }
 /**
  * list subfiles
  *
  * @param hdfsPath !null path probably exists
  * @return !null list of enclused file names - emptu if file of not exists
  */
 @Override
 public String[] ls(final String hdfsPath) {
   if (isRunningAsUser()) {
     return super.ls(hdfsPath);
   }
   final FileSystem fs = getDFS();
   try {
     FileStatus[] statuses = fs.listStatus(new Path(hdfsPath));
     if (statuses == null) return new String[0];
     List<String> holder = new ArrayList<String>();
     for (int i = 0; i < statuses.length; i++) {
       FileStatus statuse = statuses[i];
       String s = statuse.getPath().getName();
       holder.add(s);
     }
     String[] ret = new String[holder.size()];
     holder.toArray(ret);
     return ret;
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Пример #3
0
  /**
   * Search for pages matching a query, eliminating excessive hits with matching values for a named
   * field. Hits after the first <code>maxHitsPerDup</code> are removed from results. The remaining
   * hits have {@link Hit#moreFromDupExcluded()} set.
   *
   * <p>If maxHitsPerDup is zero then all hits are returned.
   *
   * @param query query
   * @param numHits number of requested hits
   * @param maxHitsPerDup the maximum hits returned with matching values, or zero
   * @param dedupField field name to check for duplicates
   * @param sortField Field to sort on (or null if no sorting).
   * @param reverse True if we are to reverse sort by <code>sortField</code>.
   * @return Hits the matching hits
   * @throws IOException
   */
  public Hits search(
      Query query,
      int numHits,
      int maxHitsPerDup,
      String dedupField,
      String sortField,
      boolean reverse)
      throws IOException {
    if (maxHitsPerDup <= 0) // disable dup checking
    return search(query, numHits, dedupField, sortField, reverse);

    final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
    int numHitsRaw = (int) (numHits * rawHitsFactor);
    if (LOG.isInfoEnabled()) {
      LOG.info("searching for " + numHitsRaw + " raw hits");
    }
    Hits hits = searchBean.search(query, numHitsRaw, dedupField, sortField, reverse);
    final long total = hits.getTotal();
    final Map<String, DupHits> dupToHits = new HashMap<String, DupHits>();
    final List<Hit> resultList = new ArrayList<Hit>();
    final Set<Hit> seen = new HashSet<Hit>();
    final List<String> excludedValues = new ArrayList<String>();
    boolean totalIsExact = true;
    for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) {
      // get the next raw hit
      if (rawHitNum >= hits.getLength()) {
        // optimize query by prohibiting more matches on some excluded values
        final Query optQuery = (Query) query.clone();
        for (int i = 0; i < excludedValues.size(); i++) {
          if (i == MAX_PROHIBITED_TERMS) break;
          optQuery.addProhibitedTerm(excludedValues.get(i), dedupField);
        }
        numHitsRaw = (int) (numHitsRaw * rawHitsFactor);
        if (LOG.isInfoEnabled()) {
          LOG.info("re-searching for " + numHitsRaw + " raw hits, query: " + optQuery);
        }
        hits = searchBean.search(optQuery, numHitsRaw, dedupField, sortField, reverse);
        if (LOG.isInfoEnabled()) {
          LOG.info("found " + hits.getTotal() + " raw hits");
        }
        rawHitNum = -1;
        continue;
      }

      final Hit hit = hits.getHit(rawHitNum);
      if (seen.contains(hit)) continue;
      seen.add(hit);

      // get dup hits for its value
      final String value = hit.getDedupValue();
      DupHits dupHits = dupToHits.get(value);
      if (dupHits == null) dupToHits.put(value, dupHits = new DupHits());

      // does this hit exceed maxHitsPerDup?
      if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit
        if (!dupHits.maxSizeExceeded) {

          // mark prior hits with moreFromDupExcluded
          for (int i = 0; i < dupHits.size(); i++) {
            dupHits.get(i).setMoreFromDupExcluded(true);
          }
          dupHits.maxSizeExceeded = true;

          excludedValues.add(value); // exclude dup
        }
        totalIsExact = false;
      } else { // no -- collect the hit
        resultList.add(hit);
        dupHits.add(hit);

        // are we done?
        // we need to find one more than asked for, so that we can tell if
        // there are more hits to be shown
        if (resultList.size() > numHits) break;
      }
    }

    final Hits results = new Hits(total, resultList.toArray(new Hit[resultList.size()]));
    results.setTotalIsExact(totalIsExact);
    return results;
  }
Пример #4
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }