Example #1
0
      public void run() {
        BufferedReader r = null;
        SplittableBufferedWriter w = this.writer;

        char recordDelim = this.options.getOutputRecordDelim();

        try {
          r = new BufferedReader(new InputStreamReader(this.stream));

          // read/write transfer loop here.
          while (true) {
            String inLine = r.readLine();
            if (null == inLine) {
              break; // EOF
            }

            w.write(inLine);
            w.write(recordDelim);
            w.allowSplit();
            counters.addBytes(1 + inLine.length());
          }
        } catch (IOException ioe) {
          LOG.error("IOException reading from psql: " + ioe.toString());
          // set the error bit so our caller can see that something went wrong.
          setError();
        } finally {
          if (null != r) {
            try {
              r.close();
            } catch (IOException ioe) {
              LOG.info("Error closing FIFO stream: " + ioe.toString());
            }
          }

          if (null != w) {
            try {
              w.close();
            } catch (IOException ioe) {
              LOG.info("Error closing HDFS stream: " + ioe.toString());
            }
          }
        }
      }
Example #2
0
  // TODO(aaron): Refactor this method to be much shorter.
  // CHECKSTYLE:OFF
  @Override
  /**
   * Import the table into HDFS by using psql to pull the data out of the db via COPY FILE TO
   * STDOUT.
   */
  public void importTable(com.cloudera.sqoop.manager.ImportJobContext context)
      throws IOException, ImportException {

    String tableName = context.getTableName();
    SqoopOptions options = context.getOptions();

    LOG.info("Beginning psql fast path import");

    if (options.getFileLayout() != SqoopOptions.FileLayout.TextFile) {
      // TODO(aaron): Support SequenceFile-based load-in
      LOG.warn("File import layout" + options.getFileLayout() + " is not supported by");
      LOG.warn("Postgresql direct import; import will proceed as text files.");
    }

    if (!StringUtils.equals(options.getNullStringValue(), options.getNullNonStringValue())) {
      throw new ImportException(
          "Detected different values of --input-string and --input-non-string "
              + "parameters. PostgreSQL direct manager do not support that. Please "
              + "either use the same values or omit the --direct parameter.");
    }

    String commandFilename = null;
    String passwordFilename = null;
    Process p = null;
    AsyncSink sink = null;
    AsyncSink errSink = null;
    PerfCounters counters = new PerfCounters();

    try {
      // Get the COPY TABLE command to issue, write this to a file, and pass
      // it in to psql with -f filename.  Then make sure we delete this file
      // in our finally block.
      String copyCmd = getCopyCommand(tableName);
      commandFilename = writeCopyCommand(copyCmd);

      // Arguments to pass to psql on the command line.
      ArrayList<String> args = new ArrayList<String>();

      // Environment to pass to psql.
      List<String> envp = Executor.getCurEnvpStrings();

      // We need to parse the connect string URI to determine the database
      // name and the host and port. If the host is localhost and the port is
      // not specified, we don't want to pass this to psql, because we want to
      // force the use of a UNIX domain socket, not a TCP/IP socket.
      String connectString = options.getConnectString();
      String databaseName = JdbcUrl.getDatabaseName(connectString);
      String hostname = JdbcUrl.getHostName(connectString);
      int port = JdbcUrl.getPort(connectString);

      if (null == databaseName) {
        throw new ImportException("Could not determine database name");
      }

      LOG.info("Performing import of table " + tableName + " from database " + databaseName);
      args.add(PSQL_CMD); // requires that this is on the path.
      args.add("--tuples-only");
      args.add("--quiet");

      String username = options.getUsername();
      if (username != null) {
        args.add("--username");
        args.add(username);
        String password = options.getPassword();
        if (null != password) {
          passwordFilename = PostgreSQLUtils.writePasswordFile(options.getTempDir(), password);
          // Need to send PGPASSFILE environment variable specifying
          // location of our postgres file.
          envp.add("PGPASSFILE=" + passwordFilename);
        }
      }

      args.add("--host");
      args.add(hostname);

      if (port != -1) {
        args.add("--port");
        args.add(Integer.toString(port));
      }

      if (null != databaseName && databaseName.length() > 0) {
        args.add(databaseName);
      }

      // The COPY command is in a script file.
      args.add("-f");
      args.add(commandFilename);

      // begin the import in an external process.
      LOG.debug("Starting psql with arguments:");
      for (String arg : args) {
        LOG.debug("  " + arg);
      }

      // This writer will be closed by AsyncSink.
      SplittableBufferedWriter w =
          DirectImportUtils.createHdfsSink(options.getConf(), options, context);

      // Actually start the psql dump.
      p = Runtime.getRuntime().exec(args.toArray(new String[0]), envp.toArray(new String[0]));

      // read from the stdout pipe into the HDFS writer.
      InputStream is = p.getInputStream();
      sink = new PostgresqlAsyncSink(w, options, counters);

      LOG.debug("Starting stream sink");
      counters.startClock();
      sink.processStream(is);
      errSink = new LoggingAsyncSink(LOG);
      errSink.processStream(p.getErrorStream());
    } finally {
      // block until the process is done.
      LOG.debug("Waiting for process completion");
      int result = 0;
      if (null != p) {
        while (true) {
          try {
            result = p.waitFor();
          } catch (InterruptedException ie) {
            // interrupted; loop around.
            continue;
          }

          break;
        }
      }

      // Remove any password file we wrote
      if (null != passwordFilename) {
        if (!new File(passwordFilename).delete()) {
          LOG.error("Could not remove postgresql password file " + passwordFilename);
          LOG.error("You should remove this file to protect your credentials.");
        }
      }

      if (null != commandFilename) {
        // We wrote the COPY comand to a tmpfile. Remove it.
        if (!new File(commandFilename).delete()) {
          LOG.info("Could not remove temp file: " + commandFilename);
        }
      }

      // block until the stream sink is done too.
      int streamResult = 0;
      if (null != sink) {
        while (true) {
          try {
            streamResult = sink.join();
          } catch (InterruptedException ie) {
            // interrupted; loop around.
            continue;
          }

          break;
        }
      }

      // Attempt to block for stderr stream sink; errors are advisory.
      if (null != errSink) {
        try {
          if (0 != errSink.join()) {
            LOG.info("Encountered exception reading stderr stream");
          }
        } catch (InterruptedException ie) {
          LOG.info("Thread interrupted waiting for stderr to complete: " + ie.toString());
        }
      }

      LOG.info("Transfer loop complete.");

      if (0 != result) {
        throw new IOException("psql terminated with status " + Integer.toString(result));
      }

      if (0 != streamResult) {
        throw new IOException("Encountered exception in stream sink");
      }

      counters.stopClock();
      LOG.info("Transferred " + counters.toString());
    }
  }