ActiveFile(File f, List<Thread> list) {
   file = f;
   if (list != null) {
     threads.addAll(list);
   }
   threads.add(Thread.currentThread());
 }
  void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    for (int j = 0; j < splits.size(); j++) {
      RecordReader<LongWritable, MyClassWritable> reader =
          format.createRecordReader(splits.get(j), attemptContext);
      reader.initialize(splits.get(j), attemptContext);

      int count = 0;
      try {
        while (reader.nextKeyValue()) {
          LongWritable key = reader.getCurrentKey();
          MyClassWritable val = reader.getCurrentValue();
          MyClass mc = val.get();
          assertEquals(mc.v, count);
          assertEquals(mc.s, Integer.toString(count));
          count++;
        }
      } finally {
        reader.close();
      }
    }
  }
    protected static double Distance(List<Double> p1, List<Double> p2) {
      double sumOfSquaredDifferences = 0.0;

      for (int i = 0; i < p1.size(); i++) {
        sumOfSquaredDifferences += Math.pow(p1.get(i) - p2.get(i), 2.0);
      }

      return Math.pow(sumOfSquaredDifferences, 0.5);
    }
  public static List<Double> GetPoint(String s) {
    StringTokenizer tokenizer = new StringTokenizer(s);

    List<Double> p = new ArrayList<Double>(58);

    while (tokenizer.hasMoreTokens()) {
      p.add(Double.parseDouble(tokenizer.nextToken()));
    }

    return p;
  }
 public static List<String> tokenizeString(Analyzer analyzer, String string) {
   List<String> result = new ArrayList<String>();
   try {
     TokenStream stream = analyzer.tokenStream(null, new StringReader(string));
     stream.reset();
     while (stream.incrementToken()) {
       result.add(stream.getAttribute(CharTermAttribute.class).toString());
     }
   } catch (IOException e) {
     // not thrown b/c we're using a string reader...
     throw new RuntimeException(e);
   }
   return result;
 }
Exemple #6
0
 public static List<InetSocketAddress> readAddresses(Path path, Configuration conf)
     throws IOException {
   final List<InetSocketAddress> addrs = new ArrayList<InetSocketAddress>();
   for (final String line : readConfig(path, conf)) {
     final StringTokenizer tokens = new StringTokenizer(line);
     if (tokens.hasMoreTokens()) {
       final String host = tokens.nextToken();
       if (tokens.hasMoreTokens()) {
         final String port = tokens.nextToken();
         addrs.add(new InetSocketAddress(host, Integer.parseInt(port)));
       }
     }
   }
   return addrs;
 }
  public void configure(JobConf conf) {
    numberOfCenters = Integer.valueOf(conf.get("numberOfCenters"));
    centersDirectory = conf.get("centersReadDirectory");

    try {
      Configuration c = new Configuration();
      FileSystem fs = FileSystem.get(c);

      for (int index = 0; index < numberOfCenters; ++index) {
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c);

        LongWritable key = new LongWritable();
        Point value = new Point();

        reader.next(key, value);

        Point center = (Point) value;

        centers.add(center);

        reader.close();
      }
    } catch (IOException e) {
      // do nothing
      // I hope this doesn't happen
      System.out.println("well, damn.");
      e.printStackTrace();
    }
  }
  /**
   * Generate the list of files and make them into FileSplits. This needs to be copied to insert a
   * filter on acceptable data
   */
  @Override
  public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    long desiredMappers =
        job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> fileStatuses = listStatus(job);
    boolean forceNumberMappers = fileStatuses.size() == 1;
    for (FileStatus file : fileStatuses) {
      Path path = file.getPath();
      if (!isPathAcceptable(path)) // filter acceptable data
      continue;
      FileSystem fs = path.getFileSystem(job.getConfiguration());
      long length = file.getLen();
      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
      if ((length != 0) && isSplitable(job, path)) {
        long blockSize = file.getBlockSize();
        // use desired mappers to force more splits
        if (forceNumberMappers && desiredMappers > 0)
          maxSize = Math.min(maxSize, (length / desiredMappers));

        long splitSize = computeSplitSize(blockSize, minSize, maxSize);

        long bytesRemaining = length;
        while (withinSlop(splitSize, bytesRemaining)) {
          int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
          splits.add(
              new FileSplit(
                  path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()));
          bytesRemaining -= splitSize;
        }

        if (bytesRemaining != 0) {
          splits.add(
              new FileSplit(
                  path,
                  length - bytesRemaining,
                  bytesRemaining,
                  blkLocations[blkLocations.length - 1].getHosts()));
        }
      } else if (length != 0) {
        splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
      } else {
        // Create empty hosts array for zero length files
        splits.add(new FileSplit(path, 0, length, new String[0]));
      }
    }
    System.out.println("Total # of splits: " + splits.size());
    //     LOG.debug("Total # of splits: " + splits.size());
    return splits;
  }
    public List<String> resolve(List<String> names) {
      List<String> m = new ArrayList<String>(names.size());

      if (names.isEmpty()) {
        return m;
      }

      if (scriptName == null) {
        for (int i = 0; i < names.size(); i++) {
          m.add(NetworkTopology.DEFAULT_RACK);
        }
        return m;
      }

      String output = runResolveCommand(names);
      if (output != null) {
        StringTokenizer allSwitchInfo = new StringTokenizer(output);
        while (allSwitchInfo.hasMoreTokens()) {
          String switchInfo = allSwitchInfo.nextToken();
          m.add(switchInfo);
        }

        if (m.size() != names.size()) {
          // invalid number of entries returned by the script
          LOG.warn(
              "Script "
                  + scriptName
                  + " returned "
                  + Integer.toString(m.size())
                  + " values when "
                  + Integer.toString(names.size())
                  + " were expected.");
          return null;
        }
      } else {
        // an error occurred. return null to signify this.
        // (exn was already logged in runResolveCommand)
        return null;
      }

      return m;
    }
  public void map(
      LongWritable key, Point value, OutputCollector<LongWritable, Point> output, Reporter reporter)
      throws IOException {
    double min = value.sumOfSquares(centers.get(0));
    int best = 0;

    for (int index = 1; index < numberOfCenters; ++index) {
      double current = value.sumOfSquares(centers.get(index));

      if (current < min) {
        min = current;
        best = index;
      }
    }

    reporter.incrCounter("NUMBER", "NODES", 1);
    reporter.incrCounter("CENTER", "" + best, 1);

    output.collect(new LongWritable(best), value);
  }
 private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService)
     throws IOException {
   List<CacheStatus> deleteSet = new LinkedList<CacheStatus>();
   // try deleting cache Status with refcount of zero
   synchronized (cachedArchives) {
     for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) {
       String cacheId = (String) it.next();
       CacheStatus lcacheStatus = cachedArchives.get(cacheId);
       if (lcacheStatus.refcount == 0) {
         // delete this cache entry from the global list
         // and mark the localized file for deletion
         deleteSet.add(lcacheStatus);
         it.remove();
       }
     }
   }
   // do the deletion asynchronously, after releasing the global lock
   Thread cacheFileCleaner =
       new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet));
   cacheFileCleaner.start();
 }
 /**
  * list subfiles
  *
  * @param hdfsPath !null path probably exists
  * @return !null list of enclused file names - emptu if file of not exists
  */
 @Override
 public String[] ls(final String hdfsPath) {
   if (isRunningAsUser()) {
     return super.ls(hdfsPath);
   }
   final FileSystem fs = getDFS();
   try {
     FileStatus[] statuses = fs.listStatus(new Path(hdfsPath));
     if (statuses == null) return new String[0];
     List<String> holder = new ArrayList<String>();
     for (int i = 0; i < statuses.length; i++) {
       FileStatus statuse = statuses[i];
       String s = statuse.getPath().getName();
       holder.add(s);
     }
     String[] ret = new String[holder.size()];
     holder.toArray(ret);
     return ret;
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Exemple #13
0
    protected void setup(Context context) throws IOException, InterruptedException {
      FileSystem fs = FileSystem.get(context.getConfiguration());

      Path cFile = new Path(context.getConfiguration().get("CFILE"));
      DataInputStream d = new DataInputStream(fs.open(cFile));
      BufferedReader reader = new BufferedReader(new InputStreamReader(d));

      String line;
      while ((line = reader.readLine()) != null) {
        StringTokenizer tokenizer = new StringTokenizer(line.toString());

        if (tokenizer.hasMoreTokens()) {
          List<Double> centroid = new ArrayList<Double>(58);

          while (tokenizer.hasMoreTokens()) {
            centroid.add(Double.parseDouble(tokenizer.nextToken()));
          }

          centroids.add(centroid);
        }
      }

      k = centroids.size();
    }
    private String runResolveCommand(List<String> args) {
      int loopCount = 0;
      if (args.size() == 0) {
        return null;
      }
      StringBuffer allOutput = new StringBuffer();
      int numProcessed = 0;
      if (maxArgs < MIN_ALLOWABLE_ARGS) {
        LOG.warn(
            "Invalid value "
                + Integer.toString(maxArgs)
                + " for "
                + SCRIPT_ARG_COUNT_KEY
                + "; must be >= "
                + Integer.toString(MIN_ALLOWABLE_ARGS));
        return null;
      }

      while (numProcessed != args.size()) {
        int start = maxArgs * loopCount;
        List<String> cmdList = new ArrayList<String>();
        cmdList.add(scriptName);
        for (numProcessed = start;
            numProcessed < (start + maxArgs) && numProcessed < args.size();
            numProcessed++) {
          cmdList.add(args.get(numProcessed));
        }
        File dir = null;
        String userDir;
        if ((userDir = System.getProperty("user.dir")) != null) {
          dir = new File(userDir);
        }
        ShellCommandExecutor s = new ShellCommandExecutor(cmdList.toArray(new String[0]), dir);
        try {
          s.execute();
          allOutput.append(s.getOutput() + " ");
        } catch (Exception e) {
          LOG.warn(StringUtils.stringifyException(e));
          return null;
        }
        loopCount++;
      }
      return allOutput.toString();
    }
Exemple #15
0
    public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      List<Double> newCentroid = null;
      int numPoints = 0;

      for (Text value : values) {
        ++numPoints;

        List<Double> p = GetPoint(value.toString());

        if (newCentroid == null) {
          // initialize the new centroid to the first element
          newCentroid = new ArrayList<Double>(p);
        } else {
          for (int i = 0; i < newCentroid.size(); i++) {
            newCentroid.set(i, newCentroid.get(i) + p.get(i));
          }
        }
      }

      // now the newCentroid contains the sum of all the points
      // so to get the average of all the points, we need to
      // divide each entry by the total number of points

      for (int i = 0; i < newCentroid.size(); i++) {
        newCentroid.set(i, newCentroid.get(i) / (double) numPoints);
      }

      // now create a string containing all the new centroid's coordinates

      String s = null;
      for (Double d : newCentroid) {
        if (s == null) {
          s = d.toString();
        } else {
          s += " " + d.toString();
        }
      }

      newCentroids.add(s);

      if (newCentroids.size() == 10) {
        WriteNewCentroids(context);
      }

      // output the centroid ID and the centroid data
      context.write(key, new Text(s));
    }
Exemple #16
0
  /**
   * Search for pages matching a query, eliminating excessive hits with matching values for a named
   * field. Hits after the first <code>maxHitsPerDup</code> are removed from results. The remaining
   * hits have {@link Hit#moreFromDupExcluded()} set.
   *
   * <p>If maxHitsPerDup is zero then all hits are returned.
   *
   * @param query query
   * @param numHits number of requested hits
   * @param maxHitsPerDup the maximum hits returned with matching values, or zero
   * @param dedupField field name to check for duplicates
   * @param sortField Field to sort on (or null if no sorting).
   * @param reverse True if we are to reverse sort by <code>sortField</code>.
   * @return Hits the matching hits
   * @throws IOException
   */
  public Hits search(
      Query query,
      int numHits,
      int maxHitsPerDup,
      String dedupField,
      String sortField,
      boolean reverse)
      throws IOException {
    if (maxHitsPerDup <= 0) // disable dup checking
    return search(query, numHits, dedupField, sortField, reverse);

    final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
    int numHitsRaw = (int) (numHits * rawHitsFactor);
    if (LOG.isInfoEnabled()) {
      LOG.info("searching for " + numHitsRaw + " raw hits");
    }
    Hits hits = searchBean.search(query, numHitsRaw, dedupField, sortField, reverse);
    final long total = hits.getTotal();
    final Map<String, DupHits> dupToHits = new HashMap<String, DupHits>();
    final List<Hit> resultList = new ArrayList<Hit>();
    final Set<Hit> seen = new HashSet<Hit>();
    final List<String> excludedValues = new ArrayList<String>();
    boolean totalIsExact = true;
    for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) {
      // get the next raw hit
      if (rawHitNum >= hits.getLength()) {
        // optimize query by prohibiting more matches on some excluded values
        final Query optQuery = (Query) query.clone();
        for (int i = 0; i < excludedValues.size(); i++) {
          if (i == MAX_PROHIBITED_TERMS) break;
          optQuery.addProhibitedTerm(excludedValues.get(i), dedupField);
        }
        numHitsRaw = (int) (numHitsRaw * rawHitsFactor);
        if (LOG.isInfoEnabled()) {
          LOG.info("re-searching for " + numHitsRaw + " raw hits, query: " + optQuery);
        }
        hits = searchBean.search(optQuery, numHitsRaw, dedupField, sortField, reverse);
        if (LOG.isInfoEnabled()) {
          LOG.info("found " + hits.getTotal() + " raw hits");
        }
        rawHitNum = -1;
        continue;
      }

      final Hit hit = hits.getHit(rawHitNum);
      if (seen.contains(hit)) continue;
      seen.add(hit);

      // get dup hits for its value
      final String value = hit.getDedupValue();
      DupHits dupHits = dupToHits.get(value);
      if (dupHits == null) dupToHits.put(value, dupHits = new DupHits());

      // does this hit exceed maxHitsPerDup?
      if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit
        if (!dupHits.maxSizeExceeded) {

          // mark prior hits with moreFromDupExcluded
          for (int i = 0; i < dupHits.size(); i++) {
            dupHits.get(i).setMoreFromDupExcluded(true);
          }
          dupHits.maxSizeExceeded = true;

          excludedValues.add(value); // exclude dup
        }
        totalIsExact = false;
      } else { // no -- collect the hit
        resultList.add(hit);
        dupHits.add(hit);

        // are we done?
        // we need to find one more than asked for, so that we can tell if
        // there are more hits to be shown
        if (resultList.size() > numHits) break;
      }
    }

    final Hits results = new Hits(total, resultList.toArray(new Hit[resultList.size()]));
    results.setTotalIsExact(totalIsExact);
    return results;
  }
  public void generateFileDetails(JspWriter out, HttpServletRequest req, Configuration conf)
      throws IOException, InterruptedException {

    int chunkSizeToView = 0;
    long startOffset = 0;
    int datanodePort;

    String blockIdStr = null;
    long currBlockId = 0;
    blockIdStr = req.getParameter("blockId");
    if (blockIdStr == null) {
      out.print("Invalid input (blockId absent)");
      return;
    }
    currBlockId = Long.parseLong(blockIdStr);

    String datanodePortStr = req.getParameter("datanodePort");
    if (datanodePortStr == null) {
      out.print("Invalid input (datanodePort absent)");
      return;
    }
    datanodePort = Integer.parseInt(datanodePortStr);

    String namenodeInfoPortStr = req.getParameter("namenodeInfoPort");
    int namenodeInfoPort = -1;
    if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr);

    String chunkSizeToViewStr = req.getParameter("chunkSizeToView");
    if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0) {
      chunkSizeToView = Integer.parseInt(chunkSizeToViewStr);
    } else {
      chunkSizeToView = JspHelper.getDefaultChunkSize(conf);
    }

    String startOffsetStr = req.getParameter("startOffset");
    if (startOffsetStr == null || Long.parseLong(startOffsetStr) < 0) startOffset = 0;
    else startOffset = Long.parseLong(startOffsetStr);

    String filename = HtmlQuoting.unquoteHtmlChars(req.getParameter("filename"));
    if (filename == null || filename.length() == 0) {
      out.print("Invalid input");
      return;
    }

    String blockSizeStr = req.getParameter("blockSize");
    long blockSize = 0;
    if (blockSizeStr == null || blockSizeStr.length() == 0) {
      out.print("Invalid input");
      return;
    }
    blockSize = Long.parseLong(blockSizeStr);

    String tokenString = req.getParameter(JspHelper.DELEGATION_PARAMETER_NAME);
    UserGroupInformation ugi = JspHelper.getUGI(req, conf);
    DFSClient dfs = JspHelper.getDFSClient(ugi, jspHelper.nameNodeAddr, conf);
    List<LocatedBlock> blocks =
        dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks();
    // Add the various links for looking at the file contents
    // URL for downloading the full file
    String downloadUrl =
        "http://"
            + req.getServerName()
            + ":"
            + +req.getServerPort()
            + "/streamFile"
            + URLEncoder.encode(filename, "UTF-8")
            + "?"
            + JspHelper.DELEGATION_PARAMETER_NAME
            + "="
            + tokenString;
    out.print("<a name=\"viewOptions\"></a>");
    out.print("<a href=\"" + downloadUrl + "\">Download this file</a><br>");

    DatanodeInfo chosenNode;
    // URL for TAIL
    LocatedBlock lastBlk = blocks.get(blocks.size() - 1);
    long blockId = lastBlk.getBlock().getBlockId();
    try {
      chosenNode = jspHelper.bestNode(lastBlk);
    } catch (IOException e) {
      out.print(e.toString());
      dfs.close();
      return;
    }
    String fqdn = InetAddress.getByName(chosenNode.getHost()).getCanonicalHostName();
    String tailUrl =
        "http://"
            + fqdn
            + ":"
            + chosenNode.getInfoPort()
            + "/tail.jsp?filename="
            + URLEncoder.encode(filename, "UTF-8")
            + "&namenodeInfoPort="
            + namenodeInfoPort
            + "&chunkSizeToView="
            + chunkSizeToView
            + "&referrer="
            + URLEncoder.encode(req.getRequestURL() + "?" + req.getQueryString(), "UTF-8")
            + JspHelper.getDelegationTokenUrlParam(tokenString);
    out.print("<a href=\"" + tailUrl + "\">Tail this file</a><br>");

    out.print("<form action=\"/browseBlock.jsp\" method=GET>");
    out.print("<b>Chunk size to view (in bytes, up to file's DFS block size): </b>");
    out.print("<input type=\"hidden\" name=\"blockId\" value=\"" + currBlockId + "\">");
    out.print("<input type=\"hidden\" name=\"blockSize\" value=\"" + blockSize + "\">");
    out.print("<input type=\"hidden\" name=\"startOffset\" value=\"" + startOffset + "\">");
    out.print("<input type=\"hidden\" name=\"filename\" value=\"" + filename + "\">");
    out.print("<input type=\"hidden\" name=\"datanodePort\" value=\"" + datanodePort + "\">");
    out.print(
        "<input type=\"hidden\" name=\"namenodeInfoPort\" value=\"" + namenodeInfoPort + "\">");
    out.print(
        "<input type=\"text\" name=\"chunkSizeToView\" value="
            + chunkSizeToView
            + " size=10 maxlength=10>");
    out.print("&nbsp;&nbsp;<input type=\"submit\" name=\"submit\" value=\"Refresh\">");
    out.print("</form>");
    out.print("<hr>");
    out.print("<a name=\"blockDetails\"></a>");
    out.print("<B>Total number of blocks: " + blocks.size() + "</B><br>");
    // generate a table and dump the info
    out.println("\n<table>");
    for (LocatedBlock cur : blocks) {
      out.print("<tr>");
      blockId = cur.getBlock().getBlockId();
      blockSize = cur.getBlock().getNumBytes();
      String blk = "blk_" + Long.toString(blockId);
      out.print("<td>" + Long.toString(blockId) + ":</td>");
      DatanodeInfo[] locs = cur.getLocations();
      for (int j = 0; j < locs.length; j++) {
        String datanodeAddr = locs[j].getName();
        datanodePort =
            Integer.parseInt(
                datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length()));
        fqdn = InetAddress.getByName(locs[j].getHost()).getCanonicalHostName();
        String blockUrl =
            "http://"
                + fqdn
                + ":"
                + locs[j].getInfoPort()
                + "/browseBlock.jsp?blockId="
                + Long.toString(blockId)
                + "&blockSize="
                + blockSize
                + "&filename="
                + URLEncoder.encode(filename, "UTF-8")
                + "&datanodePort="
                + datanodePort
                + "&genstamp="
                + cur.getBlock().getGenerationStamp()
                + "&namenodeInfoPort="
                + namenodeInfoPort
                + "&chunkSizeToView="
                + chunkSizeToView;
        out.print(
            "<td>&nbsp</td>" + "<td><a href=\"" + blockUrl + "\">" + datanodeAddr + "</a></td>");
      }
      out.println("</tr>");
    }
    out.println("</table>");
    out.print("<hr>");
    String namenodeHost = jspHelper.nameNodeAddr.getHostName();
    out.print(
        "<br><a href=\"http://"
            + InetAddress.getByName(namenodeHost).getCanonicalHostName()
            + ":"
            + namenodeInfoPort
            + "/dfshealth.jsp\">Go back to DFS home</a>");
    dfs.close();
  }
  public void generateFileChunks(JspWriter out, HttpServletRequest req, Configuration conf)
      throws IOException, InterruptedException {
    long startOffset = 0;
    int datanodePort = 0;
    int chunkSizeToView = 0;

    String namenodeInfoPortStr = req.getParameter("namenodeInfoPort");
    int namenodeInfoPort = -1;
    if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr);

    String filename = HtmlQuoting.unquoteHtmlChars(req.getParameter("filename"));
    if (filename == null) {
      out.print("Invalid input (filename absent)");
      return;
    }

    String blockIdStr = null;
    long blockId = 0;
    blockIdStr = req.getParameter("blockId");
    if (blockIdStr == null) {
      out.print("Invalid input (blockId absent)");
      return;
    }
    blockId = Long.parseLong(blockIdStr);

    String tokenString = req.getParameter(JspHelper.DELEGATION_PARAMETER_NAME);
    UserGroupInformation ugi = JspHelper.getUGI(req, conf);
    final DFSClient dfs = JspHelper.getDFSClient(ugi, jspHelper.nameNodeAddr, conf);

    Token<BlockTokenIdentifier> accessToken = BlockTokenSecretManager.DUMMY_TOKEN;
    if (conf.getBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, false)) {
      List<LocatedBlock> blks =
          dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks();
      if (blks == null || blks.size() == 0) {
        out.print("Can't locate file blocks");
        dfs.close();
        return;
      }
      for (int i = 0; i < blks.size(); i++) {
        if (blks.get(i).getBlock().getBlockId() == blockId) {
          accessToken = blks.get(i).getBlockToken();
          break;
        }
      }
    }

    String blockGenStamp = null;
    long genStamp = 0;
    blockGenStamp = req.getParameter("genstamp");
    if (blockGenStamp == null) {
      out.print("Invalid input (genstamp absent)");
      return;
    }
    genStamp = Long.parseLong(blockGenStamp);

    String blockSizeStr;
    long blockSize = 0;
    blockSizeStr = req.getParameter("blockSize");
    if (blockSizeStr == null) {
      out.print("Invalid input (blockSize absent)");
      return;
    }
    blockSize = Long.parseLong(blockSizeStr);

    String chunkSizeToViewStr = req.getParameter("chunkSizeToView");
    if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0)
      chunkSizeToView = Integer.parseInt(chunkSizeToViewStr);
    else chunkSizeToView = JspHelper.getDefaultChunkSize(conf);

    String startOffsetStr = req.getParameter("startOffset");
    if (startOffsetStr == null || Long.parseLong(startOffsetStr) < 0) startOffset = 0;
    else startOffset = Long.parseLong(startOffsetStr);

    String datanodePortStr = req.getParameter("datanodePort");
    if (datanodePortStr == null) {
      out.print("Invalid input (datanodePort absent)");
      return;
    }
    datanodePort = Integer.parseInt(datanodePortStr);
    out.print("<h3>File: ");
    JspHelper.printPathWithLinks(
        HtmlQuoting.quoteHtmlChars(filename), out, namenodeInfoPort, tokenString);
    out.print("</h3><hr>");
    String parent = new File(filename).getParent();
    JspHelper.printGotoForm(out, namenodeInfoPort, tokenString, HtmlQuoting.quoteHtmlChars(parent));
    out.print("<hr>");
    out.print(
        "<a href=\"http://"
            + req.getServerName()
            + ":"
            + req.getServerPort()
            + "/browseDirectory.jsp?dir="
            + URLEncoder.encode(parent, "UTF-8")
            + "&namenodeInfoPort="
            + namenodeInfoPort
            + "\"><i>Go back to dir listing</i></a><br>");
    out.print("<a href=\"#viewOptions\">Advanced view/download options</a><br>");
    out.print("<hr>");

    // Determine the prev & next blocks
    long nextStartOffset = 0;
    long nextBlockSize = 0;
    String nextBlockIdStr = null;
    String nextGenStamp = null;
    String nextHost = req.getServerName();
    int nextPort = req.getServerPort();
    int nextDatanodePort = datanodePort;
    // determine data for the next link
    if (startOffset + chunkSizeToView >= blockSize) {
      // we have to go to the next block from this point onwards
      List<LocatedBlock> blocks =
          dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks();
      for (int i = 0; i < blocks.size(); i++) {
        if (blocks.get(i).getBlock().getBlockId() == blockId) {
          if (i != blocks.size() - 1) {
            LocatedBlock nextBlock = blocks.get(i + 1);
            nextBlockIdStr = Long.toString(nextBlock.getBlock().getBlockId());
            nextGenStamp = Long.toString(nextBlock.getBlock().getGenerationStamp());
            nextStartOffset = 0;
            nextBlockSize = nextBlock.getBlock().getNumBytes();
            DatanodeInfo d = jspHelper.bestNode(nextBlock);
            String datanodeAddr = d.getName();
            nextDatanodePort =
                Integer.parseInt(
                    datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length()));
            nextHost = InetAddress.getByName(d.getHost()).getCanonicalHostName();
            nextPort = d.getInfoPort();
          }
        }
      }
    } else {
      // we are in the same block
      nextBlockIdStr = blockIdStr;
      nextStartOffset = startOffset + chunkSizeToView;
      nextBlockSize = blockSize;
      nextGenStamp = blockGenStamp;
    }
    String nextUrl = null;
    if (nextBlockIdStr != null) {
      nextUrl =
          "http://"
              + nextHost
              + ":"
              + nextPort
              + "/browseBlock.jsp?blockId="
              + nextBlockIdStr
              + "&blockSize="
              + nextBlockSize
              + "&startOffset="
              + nextStartOffset
              + "&genstamp="
              + nextGenStamp
              + "&filename="
              + URLEncoder.encode(filename, "UTF-8")
              + "&chunkSizeToView="
              + chunkSizeToView
              + "&datanodePort="
              + nextDatanodePort
              + "&namenodeInfoPort="
              + namenodeInfoPort
              + JspHelper.getDelegationTokenUrlParam(tokenString);
      out.print("<a href=\"" + nextUrl + "\">View Next chunk</a>&nbsp;&nbsp;");
    }
    // determine data for the prev link
    String prevBlockIdStr = null;
    String prevGenStamp = null;
    long prevStartOffset = 0;
    long prevBlockSize = 0;
    String prevHost = req.getServerName();
    int prevPort = req.getServerPort();
    int prevDatanodePort = datanodePort;
    if (startOffset == 0) {
      List<LocatedBlock> blocks =
          dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks();
      for (int i = 0; i < blocks.size(); i++) {
        if (blocks.get(i).getBlock().getBlockId() == blockId) {
          if (i != 0) {
            LocatedBlock prevBlock = blocks.get(i - 1);
            prevBlockIdStr = Long.toString(prevBlock.getBlock().getBlockId());
            prevGenStamp = Long.toString(prevBlock.getBlock().getGenerationStamp());
            prevStartOffset = prevBlock.getBlock().getNumBytes() - chunkSizeToView;
            if (prevStartOffset < 0) prevStartOffset = 0;
            prevBlockSize = prevBlock.getBlock().getNumBytes();
            DatanodeInfo d = jspHelper.bestNode(prevBlock);
            String datanodeAddr = d.getName();
            prevDatanodePort =
                Integer.parseInt(
                    datanodeAddr.substring(datanodeAddr.indexOf(':') + 1, datanodeAddr.length()));
            prevHost = InetAddress.getByName(d.getHost()).getCanonicalHostName();
            prevPort = d.getInfoPort();
          }
        }
      }
    } else {
      // we are in the same block
      prevBlockIdStr = blockIdStr;
      prevStartOffset = startOffset - chunkSizeToView;
      if (prevStartOffset < 0) prevStartOffset = 0;
      prevBlockSize = blockSize;
      prevGenStamp = blockGenStamp;
    }

    String prevUrl = null;
    if (prevBlockIdStr != null) {
      prevUrl =
          "http://"
              + prevHost
              + ":"
              + prevPort
              + "/browseBlock.jsp?blockId="
              + prevBlockIdStr
              + "&blockSize="
              + prevBlockSize
              + "&startOffset="
              + prevStartOffset
              + "&filename="
              + URLEncoder.encode(filename, "UTF-8")
              + "&chunkSizeToView="
              + chunkSizeToView
              + "&genstamp="
              + prevGenStamp
              + "&datanodePort="
              + prevDatanodePort
              + "&namenodeInfoPort="
              + namenodeInfoPort
              + JspHelper.getDelegationTokenUrlParam(tokenString);
      out.print("<a href=\"" + prevUrl + "\">View Prev chunk</a>&nbsp;&nbsp;");
    }
    out.print("<hr>");
    out.print("<textarea cols=\"100\" rows=\"25\" wrap=\"virtual\" style=\"width:100%\" READONLY>");
    try {
      jspHelper.streamBlockInAscii(
          new InetSocketAddress(req.getServerName(), datanodePort),
          blockId,
          accessToken,
          genStamp,
          blockSize,
          startOffset,
          chunkSizeToView,
          out,
          conf);
    } catch (Exception e) {
      out.print(e);
    }
    out.print("</textarea>");
    dfs.close();
  }
  public static List<String> getText(BytesWritable value, Boolean tokenizep)
      throws InterruptedException {
    Session s = Session.getDefaultInstance(new Properties());
    InputStream is = new ByteArrayInputStream(value.getBytes());
    List<String> out = new ArrayList<String>();
    try {
      MimeMessage message = new MimeMessage(s, is);
      message.getAllHeaderLines();

      Analyzer standard_analyzer = new StandardAnalyzer(Version.LUCENE_43);
      Analyzer email_analyzer = new UAX29URLEmailAnalyzer(Version.LUCENE_43);

      Address[] fromAddrs = message.getFrom();
      String fromAddrstr = "";
      if (fromAddrs != null) {
        for (Address addr : fromAddrs) {
          fromAddrstr += (addr.toString() + " ");
        }
      }

      Address[] toAddrs = message.getAllRecipients();
      String toAddrstr = "";
      if (toAddrs != null) {
        for (Address addr : toAddrs) {
          toAddrstr += (addr.toString() + " ");
        }
      }

      String subject = message.getSubject();

      String body = "";
      try {
        Object content = message.getContent();
        // System.err.println(content.getContentType());
        if (content instanceof String) {
          body = (String) content;
        } else if (content instanceof Multipart) {
          Multipart mp = (Multipart) content;
          for (int i = 0; i < mp.getCount(); i++) {
            BodyPart bp = mp.getBodyPart(i);
            // System.err.println(bp.getContentType());
            Object c = bp.getContent();
            if (c instanceof String) {
              body = (String) c;
            }
          }
        }
        // people do really evil things with email, we're not sorting through it all now
      } catch (DecodingException e) {
        System.err.println("DecodingException");
      } catch (UnsupportedEncodingException e) {
        System.err.println("UnsuportedEncodingException");
      } catch (IOException e) {
        System.err.println("IOException");
      }

      if (tokenizep) {
        List<String> fromData = new ArrayList<String>();
        List<String> toData = new ArrayList<String>();
        List<String> subjectData = new ArrayList<String>();
        List<String> bodyData = new ArrayList<String>();

        if (fromAddrstr != null) {
          fromData = tokenizeString(email_analyzer, fromAddrstr);
        }
        if (toAddrstr != null) {
          toData = tokenizeString(email_analyzer, toAddrstr);
        }
        if (subject != null) {
          subjectData = tokenizeString(standard_analyzer, subject);
        }
        if (body != null) {
          bodyData = tokenizeString(standard_analyzer, body);
        }

        out.add("FROM ");
        out.addAll(fromData);

        out.add("TO ");
        out.addAll(toData);

        out.add("SUBJECT ");
        out.addAll(subjectData);

        out.add("BODY ");
        out.addAll(bodyData);
      } else {
        // if not tokenizep, return list with from and subject fields only
        out.add(fromAddrstr);
        out.add(subject);
      }

    } catch (MessagingException e) {
      System.err.println("MessagineException");
    }

    return out;
  }
  public void generateFileChunks(JspWriter out, HttpServletRequest req) throws IOException {
    long startOffset = 0;

    int chunkSizeToView = 0;

    String referrer = req.getParameter("referrer");
    boolean noLink = false;
    if (referrer == null) {
      noLink = true;
    }

    String filename = req.getParameter("filename");
    if (filename == null) {
      out.print("Invalid input (file name absent)");
      return;
    }

    String namenodeInfoPortStr = req.getParameter("namenodeInfoPort");
    int namenodeInfoPort = -1;
    if (namenodeInfoPortStr != null) namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr);

    String chunkSizeToViewStr = req.getParameter("chunkSizeToView");
    if (chunkSizeToViewStr != null && Integer.parseInt(chunkSizeToViewStr) > 0)
      chunkSizeToView = Integer.parseInt(chunkSizeToViewStr);
    else chunkSizeToView = jspHelper.defaultChunkSizeToView;

    if (!noLink) {
      out.print("<h3>Tail of File: ");
      JspHelper.printPathWithLinks(filename, out, namenodeInfoPort);
      out.print("</h3><hr>");
      out.print("<a href=\"" + referrer + "\">Go Back to File View</a><hr>");
    } else {
      out.print("<h3>" + filename + "</h3>");
    }
    out.print("<b>Chunk size to view (in bytes, up to file's DFS block size): </b>");
    out.print(
        "<input type=\"text\" name=\"chunkSizeToView\" value="
            + chunkSizeToView
            + " size=10 maxlength=10>");
    out.print("&nbsp;&nbsp;<input type=\"submit\" name=\"submit\" value=\"Refresh\"><hr>");
    out.print("<input type=\"hidden\" name=\"filename\" value=\"" + filename + "\">");
    out.print(
        "<input type=\"hidden\" name=\"namenodeInfoPort\" value=\"" + namenodeInfoPort + "\">");
    if (!noLink) out.print("<input type=\"hidden\" name=\"referrer\" value=\"" + referrer + "\">");

    // fetch the block from the datanode that has the last block for this file
    DFSClient dfs = new DFSClient(jspHelper.nameNodeAddr, jspHelper.conf);
    List<LocatedBlock> blocks =
        dfs.namenode.getBlockLocations(filename, 0, Long.MAX_VALUE).getLocatedBlocks();
    if (blocks == null || blocks.size() == 0) {
      out.print("No datanodes contain blocks of file " + filename);
      dfs.close();
      return;
    }
    LocatedBlock lastBlk = blocks.get(blocks.size() - 1);
    long blockSize = lastBlk.getBlock().getNumBytes();
    long blockId = lastBlk.getBlock().getBlockId();
    long genStamp = lastBlk.getBlock().getGenerationStamp();
    DatanodeInfo chosenNode;
    try {
      chosenNode = jspHelper.bestNode(lastBlk);
    } catch (IOException e) {
      out.print(e.toString());
      dfs.close();
      return;
    }
    InetSocketAddress addr = NetUtils.createSocketAddr(chosenNode.getName());
    // view the last chunkSizeToView bytes while Tailing
    if (blockSize >= chunkSizeToView) startOffset = blockSize - chunkSizeToView;
    else startOffset = 0;

    out.print("<textarea cols=\"100\" rows=\"25\" wrap=\"virtual\" style=\"width:100%\" READONLY>");
    jspHelper.streamBlockInAscii(
        addr, blockId, genStamp, blockSize, startOffset, chunkSizeToView, out);
    out.print("</textarea>");
    dfs.close();
  }
Exemple #21
0
  public int run(String[] args) throws Exception {
    // printUsage();
    /*
     * SETUP
     */
    Configuration argConf = getConf();
    Hashtable<String, String> confArg = new Hashtable<String, String>();
    setup(confArg, argConf);
    Date currentTime = new Date();
    Date endDate = new Date(new Long(confArg.get("timestamp_stop")));
    Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*");
    Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*");
    logger.info("Running GeStore");

    // ZooKeeper setup
    Configuration config = HBaseConfiguration.create();
    zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config));
    zkInstance =
        new ZooKeeper(
            ZKConfig.getZKQuorumServersString(config),
            config.getInt("zookeeper.session.timeout", -1),
            zkWatcher);

    if (!confArg.get("task_id").isEmpty()) {
      confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id"));
    }

    String lockRequest = confArg.get("file_id");
    if (!confArg.get("run_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("run_id") + "_";
    if (!confArg.get("task_id").isEmpty())
      lockRequest = lockRequest + "_" + confArg.get("task_id") + "_";

    // Get type of movement
    toFrom type_move = checkArgs(confArg);
    if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) {
      List<String> arguments = new ArrayList<String>();
      arguments.add("-Dinput=" + confArg.get("local_path"));
      arguments.add("-Dtable=" + confArg.get("file_id"));
      arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop"));
      arguments.add("-Dtype=" + confArg.get("format"));
      arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id"));
      arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path"));
      arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id"));
      if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id"));
      if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add"));
      String lockName = lock(lockRequest);
      String[] argumentString = arguments.toArray(new String[arguments.size()]);
      adddb.main(argumentString);
      unlock(lockName);
      System.exit(0);
    }

    // Database registration

    dbutil db_util = new dbutil(config);
    db_util.register_database(confArg.get("db_name_files"), true);
    db_util.register_database(confArg.get("db_name_runs"), true);
    db_util.register_database(confArg.get("db_name_updates"), true);
    FileSystem hdfs = FileSystem.get(config);
    FileSystem localFS = FileSystem.getLocal(config);

    // Get source type
    confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    confArg.put(
        "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id")));
    if (!confArg.get("source").equals("local")
        && type_move == toFrom.REMOTE2LOCAL
        && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) {
      confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util)));
    }

    /*
     * Get previous timestamp
     */
    Get run_id_get = new Get(confArg.get("run_id").getBytes());
    Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get);
    KeyValue run_file_prev =
        run_get.getColumnLatest(
            "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes());
    String last_timestamp = new String("0");
    if (null != run_file_prev && !confArg.get("source").equals("local")) {
      long last_timestamp_real = run_file_prev.getTimestamp();
      Long current_timestamp = new Long(confArg.get("timestamp_real"));
      if ((current_timestamp - last_timestamp_real) > 36000) {
        last_timestamp = new String(run_file_prev.getValue());
        Integer lastTimestamp = new Integer(last_timestamp);
        lastTimestamp += 1;
        last_timestamp = lastTimestamp.toString();
        logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate);
        Date last_run = new Date(run_file_prev.getTimestamp());
        if (last_run.before(endDate) && !full_run) {
          confArg.put("timestamp_start", last_timestamp);
        }
      }
    }

    Integer tse = new Integer(confArg.get("timestamp_stop"));
    Integer tss = new Integer(confArg.get("timestamp_start"));
    if (tss > tse) {
      logger.info("No new version of requested file.");
      return 0;
    }

    /*
     * Generate file
     */

    String lockName = lock(lockRequest);

    Get file_id_get = new Get(confArg.get("file_id").getBytes());
    Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get);
    if (!file_get.isEmpty()) {
      boolean found =
          hasFile(
              db_util,
              hdfs,
              confArg.get("db_name_files"),
              confArg.get("file_id"),
              getFullPath(confArg));
      if (confArg.get("source").equals("fullfile")) {
        found = false;
      }
      String filenames_put =
          getFileNames(
              db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg));
      // Filename not found in file database
      if (!found && type_move == toFrom.REMOTE2LOCAL) {
        if (!confArg.get("source").equals("local")) {
          // Generate intermediate file
          if (getFile(hdfs, confArg, db_util) == null) {
            unlock(lockName);
            return 1;
          }
          // Put generated file into file database
          if (!confArg.get("format").equals("fullfile")) {
            putFileEntry(
                db_util,
                hdfs,
                confArg.get("db_name_files"),
                confArg.get("file_id"),
                confArg.get("full_file_name"),
                confArg.get("source"));
          }
        } else {
          logger.warn("Remote file not found, and cannot be generated! File: " + confArg);
          unlock(lockName);
          return 1;
        }
      }
    } else {
      if (type_move == toFrom.REMOTE2LOCAL) {
        logger.warn("Remote file not found, and cannot be generated.");
        unlock(lockName);
        return 1;
      }
    }

    /*
     * Copy file
     * Update tables
     */

    if (type_move == toFrom.LOCAL2REMOTE) {
      if (!confArg.get("format").equals("fullfile")) {
        putFileEntry(
            db_util,
            hdfs,
            confArg.get("db_name_files"),
            confArg.get("file_id"),
            getFullPath(confArg),
            confArg.get("source"));
      }
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg)));
    } else if (type_move == toFrom.REMOTE2LOCAL) {
      FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*"));
      putRunEntry(
          db_util,
          confArg.get("db_name_runs"),
          confArg.get("run_id"),
          confArg.get("file_id"),
          confArg.get("type"),
          confArg.get("timestamp_real"),
          confArg.get("timestamp_stop"),
          getFullPath(confArg),
          confArg.get("delimiter"));
      unlock(lockName);
      for (FileStatus file : files) {
        Path cur_file = file.getPath();
        Path cur_local_path =
            new Path(new String(confArg.get("local_path") + confArg.get("file_id")));
        String suffix = getSuffix(getFileName(confArg), cur_file.getName());
        if (suffix.length() > 0) {
          cur_local_path = cur_local_path.suffix(new String("." + suffix));
        }
        if (confArg.get("copy").equals("true")) {
          String crc = hdfs.getFileChecksum(cur_file).toString();
          if (checksumLocalTest(cur_local_path, crc)) {
            continue;
          } else {
            hdfs.copyToLocalFile(cur_file, cur_local_path);
            writeChecksum(cur_local_path, crc);
          }
        } else {
          System.out.println(cur_local_path + "\t" + cur_file);
        }
      }
    }
    unlock(lockName);
    return 0;
  }