/** * Data is batched up based on connection batch size. Column PDataType is read from metadata and * is used to convert column value to correct type before upsert. * * <p>The format is determined by the supplied csvParser. * * @param csvParser CSVParser instance * @throws Exception */ public void upsert(CSVParser csvParser) throws Exception { List<ColumnInfo> columnInfoList = buildColumnInfoList(csvParser); boolean wasAutoCommit = conn.getAutoCommit(); try { conn.setAutoCommit(false); long start = System.currentTimeMillis(); CsvUpsertListener upsertListener = new CsvUpsertListener(conn, conn.getMutateBatchSize()); CsvUpsertExecutor csvUpsertExecutor = CsvUpsertExecutor.create( conn, tableName, columnInfoList, upsertListener, arrayElementSeparator); csvUpsertExecutor.execute(csvParser); csvUpsertExecutor.close(); conn.commit(); double elapsedDuration = ((System.currentTimeMillis() - start) / 1000.0); System.out.println( "CSV Upsert complete. " + upsertListener.getTotalUpsertCount() + " rows upserted"); System.out.println("Time: " + elapsedDuration + " sec(s)\n"); } finally { // release reader resources. if (csvParser != null) { csvParser.close(); } if (wasAutoCommit) { conn.setAutoCommit(true); } } }
@Override public void upsertDone(long upsertCount) { totalUpserts = upsertCount; if (upsertCount % upsertBatchSize == 0) { if (upsertCount % 1000 == 0) { LOG.info("Processed upsert #{}", upsertCount); } try { LOG.info("Committing after {} records", upsertCount); conn.commit(); } catch (SQLException e) { throw new RuntimeException(e); } } }
private static MutationState upsertSelect( PhoenixStatement statement, TableRef tableRef, RowProjector projector, ResultIterator iterator, int[] columnIndexes, int[] pkSlotIndexes) throws SQLException { try { PhoenixConnection connection = statement.getConnection(); ConnectionQueryServices services = connection.getQueryServices(); int maxSize = services .getProps() .getInt( QueryServices.MAX_MUTATION_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE); int batchSize = Math.min(connection.getMutateBatchSize(), maxSize); boolean isAutoCommit = connection.getAutoCommit(); byte[][] values = new byte[columnIndexes.length][]; int rowCount = 0; Map<ImmutableBytesPtr, Map<PColumn, byte[]>> mutation = Maps.newHashMapWithExpectedSize(batchSize); PTable table = tableRef.getTable(); ResultSet rs = new PhoenixResultSet(iterator, projector, statement); ImmutableBytesWritable ptr = new ImmutableBytesWritable(); while (rs.next()) { for (int i = 0; i < values.length; i++) { PColumn column = table.getColumns().get(columnIndexes[i]); byte[] bytes = rs.getBytes(i + 1); ptr.set(bytes == null ? ByteUtil.EMPTY_BYTE_ARRAY : bytes); Object value = rs.getObject(i + 1); int rsPrecision = rs.getMetaData().getPrecision(i + 1); Integer precision = rsPrecision == 0 ? null : rsPrecision; int rsScale = rs.getMetaData().getScale(i + 1); Integer scale = rsScale == 0 ? null : rsScale; // We are guaranteed that the two column will have compatible types, // as we checked that before. if (!column .getDataType() .isSizeCompatible( ptr, value, column.getDataType(), precision, scale, column.getMaxLength(), column.getScale())) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.DATA_EXCEEDS_MAX_CAPACITY) .setColumnName(column.getName().getString()) .setMessage("value=" + column.getDataType().toStringLiteral(ptr, null)) .build() .buildException(); } column .getDataType() .coerceBytes( ptr, value, column.getDataType(), precision, scale, SortOrder.getDefault(), column.getMaxLength(), column.getScale(), column.getSortOrder()); values[i] = ByteUtil.copyKeyBytesIfNecessary(ptr); } setValues(values, pkSlotIndexes, columnIndexes, table, mutation); rowCount++; // Commit a batch if auto commit is true and we're at our batch size if (isAutoCommit && rowCount % batchSize == 0) { MutationState state = new MutationState(tableRef, mutation, 0, maxSize, connection); connection.getMutationState().join(state); connection.commit(); mutation.clear(); } } // If auto commit is true, this last batch will be committed upon return return new MutationState( tableRef, mutation, rowCount / batchSize * batchSize, maxSize, connection); } finally { iterator.close(); } }