@Override public void setUp() throws IOException, InterruptedException, SQLException { Configuration conf = newConf(); SqoopOptions options = getSqoopOptions(conf); manager = new HsqldbManager(options); conn = manager.getConnection(); }
/** * Perform the import of data from an HDFS path to a Hive table. * * @param inputTableName the name of the table as loaded into HDFS * @param outputTableName the name of the table to create in Hive. * @param createOnly if true, run the CREATE TABLE statement but not LOAD DATA. */ public void importTable(String inputTableName, String outputTableName, boolean createOnly) throws IOException { if (null == outputTableName) { outputTableName = inputTableName; } LOG.debug("Hive.inputTable: " + inputTableName); LOG.debug("Hive.outputTable: " + outputTableName); // For testing purposes against our mock hive implementation, // if the sysproperty "expected.script" is set, we set the EXPECTED_SCRIPT // environment variable for the child hive process. We also disable // timestamp comments so that we have deterministic table creation scripts. String expectedScript = System.getProperty("expected.script"); List<String> env = Executor.getCurEnvpStrings(); boolean debugMode = expectedScript != null; if (debugMode) { env.add("EXPECTED_SCRIPT=" + expectedScript); env.add("TMPDIR=" + options.getTempDir()); } // generate the HQL statements to run. // reset the connection as it might have timed out connManager.discardConnection(true); TableDefWriter tableWriter = new TableDefWriter( options, connManager, inputTableName, outputTableName, configuration, !debugMode); String createTableStr = tableWriter.getCreateTableStmt() + ";\n"; String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n"; Path finalPath = tableWriter.getFinalPath(); if (!isGenerateOnly()) { removeTempLogs(finalPath); LOG.info("Loading uploaded data into Hive"); String codec = options.getCompressionCodec(); if (codec != null && (codec.equals(CodecMap.LZOP) || codec.equals(CodecMap.getCodecClassName(CodecMap.LZOP)))) { try { Tool tool = ReflectionUtils.newInstance( Class.forName("com.hadoop.compression.lzo.DistributedLzoIndexer") .asSubclass(Tool.class), configuration); ToolRunner.run(configuration, tool, new String[] {finalPath.toString()}); } catch (Exception ex) { LOG.error("Error indexing lzo files", ex); throw new IOException("Error indexing lzo files", ex); } } } // write them to a script file. File scriptFile = getScriptFile(outputTableName); try { String filename = scriptFile.toString(); BufferedWriter w = null; try { FileOutputStream fos = new FileOutputStream(scriptFile); w = new BufferedWriter(new OutputStreamWriter(fos)); w.write(createTableStr, 0, createTableStr.length()); if (!createOnly) { w.write(loadDataStmtStr, 0, loadDataStmtStr.length()); } } catch (IOException ioe) { LOG.error("Error writing Hive load-in script: " + ioe.toString()); ioe.printStackTrace(); throw ioe; } finally { if (null != w) { try { w.close(); } catch (IOException ioe) { LOG.warn("IOException closing stream to Hive script: " + ioe.toString()); } } } if (!isGenerateOnly()) { executeScript(filename, env); LOG.info("Hive import complete."); cleanUp(finalPath); } } finally { if (!isGenerateOnly()) { // User isn't interested in saving the DDL. Remove the file. if (!scriptFile.delete()) { LOG.warn("Could not remove temporary file: " + scriptFile.toString()); // try to delete the file later. scriptFile.deleteOnExit(); } } } }