Ejemplo n.º 1
0
  public void insertCrawlerTaskList(Iterable<CrawlerTask> list) throws Throwable {
    conn.setAutoCommit(false);
    PreparedStatement statement =
        conn.prepareStatement(
            "INSERT INTO `yamaloo`.`crawlertask`"
                + " (`BatchID`, `UrlHash`, `Url`, `Depth`, `ParentTaskID`, `Status`, `CreateTime`)"
                + " SELECT ?, ?, ?, ?, ?, ?, ? FROM dual"
                + " WHERE not exists"
                + " (select * from `yamaloo`.`crawlertask` where BatchID = ? AND UrlHash = ?); ");

    int count = 0;
    for (CrawlerTask task : list) {
      statement.setInt(1, task.getBatchID());
      statement.setString(2, task.getUrlHash());
      statement.setString(3, task.getUrl().toString());
      statement.setInt(4, task.getDepth());
      statement.setInt(5, task.getParentTaskID());
      statement.setString(6, task.getStatus().toString());
      statement.setTimestamp(7, task.getCreateTime());
      statement.setInt(8, task.getBatchID());
      statement.setString(9, task.getUrlHash());

      statement.addBatch();
      count++;

      if (count >= 1000) {
        count = 0;
        statement.executeBatch();
        conn.commit();
      }
    }

    statement.executeBatch();
    conn.commit();

    statement.close();
    conn.setAutoCommit(true);
  }
Ejemplo n.º 2
0
  public void updateCrawlerTaskList(List<CrawlerTask> list) throws Throwable {
    conn.setAutoCommit(false);
    PreparedStatement statement =
        conn.prepareStatement(
            "UPDATE `yamaloo`.`crawlertask`"
                + " SET `Status` = ?,"
                + " `CrawlBeginTime` = ?,"
                + " `CrawlEndTime` = ?,"
                + " `RetryCount` = ?,"
                + " `ContentType` = ?"
                + " WHERE CrawlerTaskID = ?");

    int count = 0;
    for (CrawlerTask task : list) {
      statement.setString(1, task.getStatus().toString());
      statement.setTimestamp(2, task.getCrawlBeginTime());
      statement.setTimestamp(3, task.getCrawlEndTime());
      statement.setInt(4, task.getRetryCount());
      statement.setString(5, task.getContentType());
      statement.setInt(6, task.getCrawlerTaskID());

      statement.addBatch();
      count++;

      if (count >= 1000) {
        count = 0;
        statement.executeBatch();
        conn.commit();
      }
    }

    statement.executeBatch();
    conn.commit();

    statement.close();
    conn.setAutoCommit(true);
  }