/**
  * Initializes the map-part of the job with the appropriate input settings.
  *
  * @param job The map-reduce job
  * @param inputClass the class object implementing DBWritable, which is the Java object holding
  *     tuple fields.
  * @param inputQuery the input query to select fields. Example : "SELECT f1, f2, f3 FROM Mytable
  *     ORDER BY f1"
  * @param inputCountQuery the input query that returns the number of records in the table. Example
  *     : "SELECT COUNT(f1) FROM Mytable"
  * @see #setInput(Job, Class, String, String, String, String...)
  */
 public static void setInput(
     Job job, Class<? extends DBWritable> inputClass, String inputQuery, String inputCountQuery) {
   job.setInputFormatClass(DBInputFormat.class);
   DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
   dbConf.setInputClass(inputClass);
   dbConf.setInputQuery(inputQuery);
   dbConf.setInputCountQuery(inputCountQuery);
 }
Example #2
0
  private static DBConfiguration setOutput(JobConf job, String tableName) {
    job.setOutputFormat(DBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    DBConfiguration dbConf = new DBConfiguration(job);

    dbConf.setOutputTableName(tableName);
    return dbConf;
  }
Example #3
0
 /**
  * Initializes the reduce-part of the job with the appropriate output settings
  *
  * @param job The job
  * @param tableName The table to insert data into
  * @param fieldNames The field names in the table.
  */
 public static void setOutput(JobConf job, String tableName, String... fieldNames) {
   if (fieldNames.length > 0 && fieldNames[0] != null) {
     DBConfiguration dbConf = setOutput(job, tableName);
     dbConf.setOutputFieldNames(fieldNames);
   } else {
     if (fieldNames.length > 0) setOutput(job, tableName, fieldNames.length);
     else throw new IllegalArgumentException("Field names must be greater than 0");
   }
 }
  /**
   * Returns the query for getting the total number of rows, subclasses can override this for custom
   * behaviour.
   */
  protected String getCountQuery() {

    if (dbConf.getInputCountQuery() != null) {
      return dbConf.getInputCountQuery();
    }

    StringBuilder query = new StringBuilder();
    query.append("SELECT COUNT(*) FROM " + tableName);

    if (conditions != null && conditions.length() > 0) query.append(" WHERE " + conditions);
    return query.toString();
  }
  /** {@inheritDoc} */
  public void setConf(Configuration conf) {

    dbConf = new DBConfiguration(conf);

    try {
      this.connection = dbConf.getConnection();
      this.connection.setAutoCommit(false);
      connection.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);

      DatabaseMetaData dbMeta = connection.getMetaData();
      this.dbProductName = dbMeta.getDatabaseProductName().toUpperCase();
    } catch (Exception ex) {
      throw new RuntimeException(ex);
    }

    tableName = dbConf.getInputTableName();
    fieldNames = dbConf.getInputFieldNames();
    conditions = dbConf.getInputConditions();
  }
 /**
  * Initializes the map-part of the job with the appropriate input settings.
  *
  * @param job The map-reduce job
  * @param inputClass the class object implementing DBWritable, which is the Java object holding
  *     tuple fields.
  * @param tableName The table to read data from
  * @param conditions The condition which to select data with, eg. '(updated > 20070101 AND length
  *     > 0)'
  * @param orderBy the fieldNames in the orderBy clause.
  * @param fieldNames The field names in the table
  * @see #setInput(Job, Class, String, String)
  */
 public static void setInput(
     Job job,
     Class<? extends DBWritable> inputClass,
     String tableName,
     String conditions,
     String orderBy,
     String... fieldNames) {
   job.setInputFormatClass(DBInputFormat.class);
   DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
   dbConf.setInputClass(inputClass);
   dbConf.setInputTableName(tableName);
   dbConf.setInputFieldNames(fieldNames);
   dbConf.setInputConditions(conditions);
   dbConf.setInputOrderBy(orderBy);
 }
  public static void runJob(String mysqlJar, String output) throws Exception {
    Configuration conf = new Configuration();

    JobHelper.addJarForJob(conf, mysqlJar);

    DBConfiguration.configureDB(
        conf,
        "com.mysql.jdbc.Driver",
        "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password");

    Job job = new Job(conf);
    job.setJarByClass(DBImportExportMapReduce.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(DBInputFormat.class);
    job.setOutputFormatClass(DBOutputFormat.class);

    job.setMapOutputKeyClass(StockRecord.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(StockRecord.class);
    job.setOutputValueClass(NullWritable.class);

    job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.setNumReduceTasks(4);

    DBInputFormat.setInput(
        job, StockRecord.class, "select * from stocks", "SELECT COUNT(id) FROM stocks");

    DBOutputFormat.setOutput(job, "stocks_export", StockRecord.fields);

    Path outputPath = new Path(output);

    FileOutputFormat.setOutputPath(job, outputPath);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    job.waitForCompletion(true);
  }
  protected RecordReader<LongWritable, T> createDBRecordReader(
      DBInputSplit split, Configuration conf) throws IOException {

    @SuppressWarnings("unchecked")
    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
    try {
      // use database product name to determine appropriate record reader.
      if (dbProductName.startsWith("ORACLE")) {
        // use Oracle-specific db reader.
        return new OracleDBRecordReader<T>(
            split, inputClass, conf, connection, getDBConf(), conditions, fieldNames, tableName);
      } else if (dbProductName.startsWith("MYSQL")) {
        // use MySQL-specific db reader.
        return new MySQLDBRecordReader<T>(
            split, inputClass, conf, connection, getDBConf(), conditions, fieldNames, tableName);
      } else {
        // Generic reader.
        return new DBRecordReader<T>(
            split, inputClass, conf, connection, getDBConf(), conditions, fieldNames, tableName);
      }
    } catch (SQLException ex) {
      throw new IOException(ex.getMessage());
    }
  }
 public Configuration getConf() {
   return dbConf.getConf();
 }
 static {
   URL_KEY = DBConfiguration.PREFERENCES_URL_KEY;
   DEFAULTS = DBConfiguration.getDefaults();
 }
Example #11
0
 /**
  * Initializes the reduce-part of the job with the appropriate output settings
  *
  * @param job The job
  * @param tableName The table to insert data into
  * @param fieldCount the number of fields in the table.
  */
 public static void setOutput(JobConf job, String tableName, int fieldCount) {
   DBConfiguration dbConf = setOutput(job, tableName);
   dbConf.setOutputFieldCount(fieldCount);
 }
Example #12
0
  private static void initTable(
      final Connection connection, final DatabaseTable table, final DBConfiguration dbConfiguration)
      throws DatabaseException {
    try {
      checkIfTableExists(connection, table);
      LOGGER.trace("table " + table + " appears to exist");
    } catch (SQLException e) { // assume error was due to table missing;
      {
        final StringBuilder sqlString = new StringBuilder();
        sqlString.append("CREATE table ").append(table.toString()).append(" (").append("\n");
        sqlString
            .append("  " + KEY_COLUMN + " ")
            .append(dbConfiguration.getColumnTypeKey())
            .append("(")
            .append(KEY_COLUMN_LENGTH)
            .append(") NOT NULL PRIMARY KEY,")
            .append("\n");
        sqlString
            .append("  " + VALUE_COLUMN + " ")
            .append(dbConfiguration.getColumnTypeValue())
            .append(" ");
        sqlString.append("\n");
        sqlString.append(")").append("\n");

        LOGGER.trace(
            "attempting to execute the following sql statement:\n " + sqlString.toString());

        Statement statement = null;
        try {
          statement = connection.createStatement();
          statement.execute(sqlString.toString());
          LOGGER.debug("created table " + table.toString());
        } catch (SQLException ex) {
          LOGGER.error("error creating new table " + table.toString() + ": " + ex.getMessage());
        } finally {
          close(statement);
        }
      }

      {
        final String indexName = table.toString() + "_IDX";
        final StringBuilder sqlString = new StringBuilder();
        sqlString.append("CREATE index ").append(indexName);
        sqlString.append(" ON ").append(table.toString());
        sqlString.append(" (").append(KEY_COLUMN).append(")");
        Statement statement = null;

        LOGGER.trace(
            "attempting to execute the following sql statement:\n " + sqlString.toString());

        try {
          statement = connection.createStatement();
          statement.execute(sqlString.toString());
          LOGGER.debug("created index " + indexName);
        } catch (SQLException ex) {
          LOGGER.error("error creating new index " + indexName + ": " + ex.getMessage());
        } finally {
          close(statement);
        }
      }
    }
  }
Example #13
0
  private Connection openDB(final DBConfiguration dbConfiguration) throws DatabaseException {
    final String connectionURL = dbConfiguration.getConnectionString();
    final String jdbcClassName = dbConfiguration.getDriverClassname();

    try {
      final byte[] jdbcDriverBytes = dbConfiguration.getJdbcDriver();
      if (jdbcDriverBytes != null) {
        LOGGER.debug("loading JDBC database driver stored in configuration");
        final JarClassLoader jarClassLoader = new JarClassLoader();
        jarClassLoader.add(new ByteArrayInputStream(jdbcDriverBytes));
        final JclObjectFactory jclObjectFactory = JclObjectFactory.getInstance();

        // Create object of loaded class
        driver = (Driver) jclObjectFactory.create(jarClassLoader, jdbcClassName);

        LOGGER.debug(
            "successfully loaded JDBC database driver '"
                + jdbcClassName
                + "' from application configuration");
      }
    } catch (Throwable e) {
      final String errorMsg =
          "error registering JDBC database driver stored in configuration: " + e.getMessage();
      final ErrorInformation errorInformation =
          new ErrorInformation(PwmError.ERROR_DB_UNAVAILABLE, errorMsg);
      LOGGER.error(errorMsg, e);
      throw new DatabaseException(errorInformation);
    }

    if (driver == null) {
      try {
        LOGGER.debug("loading JDBC database driver from classpath: " + jdbcClassName);
        driver = (Driver) Class.forName(jdbcClassName).newInstance();

        LOGGER.debug("successfully loaded JDBC database driver from classpath: " + jdbcClassName);
      } catch (Throwable e) {
        final String errorMsg =
            e.getClass().getName()
                + " error loading JDBC database driver from classpath: "
                + e.getMessage();
        final ErrorInformation errorInformation =
            new ErrorInformation(PwmError.ERROR_DB_UNAVAILABLE, errorMsg);
        throw new DatabaseException(errorInformation);
      }
    }

    try {
      LOGGER.debug("opening connection to database " + connectionURL);
      final Properties connectionProperties = new Properties();
      if (dbConfiguration.getUsername() != null && !dbConfiguration.getUsername().isEmpty()) {
        connectionProperties.setProperty("user", dbConfiguration.getUsername());
      }
      if (dbConfiguration.getPassword() != null) {
        connectionProperties.setProperty(
            "password", dbConfiguration.getPassword().getStringValue());
      }
      final Connection connection = driver.connect(connectionURL, connectionProperties);

      final Map<PwmAboutProperty, String> debugProps = getConnectionDebugProperties(connection);
      ;
      LOGGER.debug(
          "successfully opened connection to database "
              + connectionURL
              + ", properties: "
              + JsonUtil.serializeMap(debugProps));

      connection.setAutoCommit(true);
      return connection;
    } catch (Throwable e) {
      final String errorMsg =
          "error connecting to database: " + Helper.readHostileExceptionMessage(e);
      final ErrorInformation errorInformation =
          new ErrorInformation(PwmError.ERROR_DB_UNAVAILABLE, errorMsg);
      if (e instanceof IOException) {
        LOGGER.error(errorInformation);
      } else {
        LOGGER.error(errorMsg, e);
      }
      throw new DatabaseException(errorInformation);
    }
  }