示例#1
0
文件: RPC.java 项目: baeeq/hadoop-20
    public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
      final boolean logDebug = LOG.isDebugEnabled();
      long startTime = 0;
      if (logDebug) {
        startTime = System.currentTimeMillis();
      }

      ObjectWritable value = null;
      try {
        value =
            (ObjectWritable)
                client.call(
                    new Invocation(method, args), getAddress(), protocol, ticket, rpcTimeout);
      } catch (RemoteException re) {
        throw re;
      } catch (ConnectException ce) {
        needCheckDnsUpdate = true;
        throw ce;
      } catch (NoRouteToHostException nrhe) {
        needCheckDnsUpdate = true;
        throw nrhe;
      } catch (PortUnreachableException pue) {
        needCheckDnsUpdate = true;
        throw pue;
      } catch (UnknownHostException uhe) {
        needCheckDnsUpdate = true;
        throw uhe;
      }
      if (logDebug) {
        long callTime = System.currentTimeMillis() - startTime;
        LOG.debug("Call: " + method.getName() + " " + callTime);
      }
      return value.get();
    }
示例#2
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
示例#3
0
文件: RPC.java 项目: baeeq/hadoop-20
    public Writable call(Class<?> protocol, Writable param, long receivedTime) throws IOException {
      try {
        Invocation call = (Invocation) param;
        if (verbose) log("Call: " + call);

        Method method = protocol.getMethod(call.getMethodName(), call.getParameterClasses());
        method.setAccessible(true);

        int qTime = (int) (System.currentTimeMillis() - receivedTime);
        long startNanoTime = System.nanoTime();
        Object value = method.invoke(instance, call.getParameters());
        long processingMicroTime = (System.nanoTime() - startNanoTime) / 1000;
        if (LOG.isDebugEnabled()) {
          LOG.debug(
              "Served: "
                  + call.getMethodName()
                  + " queueTime (millisec)= "
                  + qTime
                  + " procesingTime (microsec)= "
                  + processingMicroTime);
        }
        rpcMetrics.rpcQueueTime.inc(qTime);
        rpcMetrics.rpcProcessingTime.inc(processingMicroTime);

        MetricsTimeVaryingRate m =
            (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName());
        if (m == null) {
          try {
            m = new MetricsTimeVaryingRate(call.getMethodName(), rpcMetrics.registry);
          } catch (IllegalArgumentException iae) {
            // the metrics has been registered; re-fetch the handle
            LOG.debug("Error register " + call.getMethodName(), iae);
            m = (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName());
          }
        }
        // record call time in microseconds
        m.inc(processingMicroTime);

        if (verbose) log("Return: " + value);

        return new ObjectWritable(method.getReturnType(), value);

      } catch (InvocationTargetException e) {
        Throwable target = e.getTargetException();
        if (target instanceof IOException) {
          throw (IOException) target;
        } else {
          IOException ioe = new IOException(target.toString());
          ioe.setStackTrace(target.getStackTrace());
          throw ioe;
        }
      } catch (Throwable e) {
        if (!(e instanceof IOException)) {
          LOG.error("Unexpected throwable object ", e);
        }
        IOException ioe = new IOException(e.toString());
        ioe.setStackTrace(e.getStackTrace());
        throw ioe;
      }
    }
示例#4
0
文件: RPC.java 项目: baeeq/hadoop-20
  /**
   * Get a proxy connection to a remote server
   *
   * @param protocol protocol class
   * @param clientVersion client version
   * @param addr remote address
   * @param conf configuration to use
   * @param rpcTimeout timeout for each RPC
   * @param timeout time in milliseconds before giving up
   * @return the proxy
   * @throws IOException if the far end through a RemoteException
   */
  static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy(
      Class<T> protocol,
      long clientVersion,
      InetSocketAddress addr,
      Configuration conf,
      long timeout,
      int rpcTimeout)
      throws IOException {
    long startTime = System.currentTimeMillis();
    UserGroupInformation ugi = null;
    try {
      ugi = UserGroupInformation.login(conf);
    } catch (LoginException le) {
      throw new RuntimeException("Couldn't login!");
    }
    IOException ioe;
    while (true) {
      try {
        return getProtocolProxy(
            protocol,
            clientVersion,
            addr,
            ugi,
            conf,
            NetUtils.getDefaultSocketFactory(conf),
            rpcTimeout);
      } catch (ConnectException se) { // namenode has not been started
        LOG.info("Server at " + addr + " not available yet, Zzzzz...");
        ioe = se;
      } catch (SocketTimeoutException te) { // namenode is busy
        LOG.info("Problem connecting to server: " + addr);
        ioe = te;
      }
      // check if timed out
      if (System.currentTimeMillis() - timeout >= startTime) {
        throw ioe;
      }

      // wait for retry
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ie) {
        // IGNORE
      }
    }
  }
示例#5
0
文件: RPC.java 项目: baeeq/hadoop-20
 private synchronized InetSocketAddress getAddress() {
   if (needCheckDnsUpdate
       && address != null
       && address.getHostName() != null
       && System.currentTimeMillis() - this.timeLastDnsCheck > MIN_DNS_CHECK_INTERVAL_MSEC) {
     try {
       InetSocketAddress newAddr = NetUtils.resolveAddress(address);
       if (newAddr != null) {
         LOG.info("DNS change: " + newAddr);
         address = newAddr;
       }
     } finally {
       this.timeLastDnsCheck = System.currentTimeMillis();
     }
   }
   needCheckDnsUpdate = false;
   return address;
 }
示例#6
0
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
 }