@Test public void testSkippingHeaderWithText() throws IOException { TableMeta meta = CatalogUtil.newTableMeta("TEXT"); meta.putOption(StorageConstants.TEXT_SKIP_HEADER_LINE, "1"); meta.putOption(StorageConstants.TEXT_DELIMITER, ","); FileFragment fragment = getFileFragment("testSkip.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(17 + lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(6, lines); scanner.close(); } }
@Test public void testIgnoreTruncatedValueErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance3.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); try { Tuple tuple = scanner.next(); assertNull(tuple); } finally { scanner.close(); } }
@Test public void testNoErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0"); FileFragment fragment = getFileFragment("testErrorTolerance2.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); try { scanner.next(); } catch (IOException ioe) { return; } finally { scanner.close(); } fail(); }
@Test public void testIgnoreAllErrors() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(3, i); scanner.close(); }
private boolean isClonedTable(String orignalTable, String newTable) throws Exception { assertTableExists(newTable); TableDesc origTableDesc = client.getTableDesc(orignalTable); TableDesc newTableDesc = client.getTableDesc(newTable); if (isClonedSchema(origTableDesc.getSchema(), newTableDesc.getSchema()) == false) { fail("Schema of input tables do not match"); return false; } // Check partition information PartitionMethodDesc origPartMethod = origTableDesc.getPartitionMethod(); PartitionMethodDesc newPartMethod = newTableDesc.getPartitionMethod(); if (origPartMethod != null) { if (newPartMethod == null) { fail("New table does not have partition info"); return false; } if (isClonedSchema(origPartMethod.getExpressionSchema(), newPartMethod.getExpressionSchema()) == false) { fail("Partition columns of input tables do not match"); return false; } if (origPartMethod.getPartitionType().equals(newPartMethod.getPartitionType()) == false) { fail("Partition type of input tables do not match"); return false; } } // Check external flag if (origTableDesc.isExternal() != newTableDesc.isExternal()) { fail("External table flag on input tables not equal"); return false; } if (origTableDesc.getMeta() != null) { TableMeta origMeta = origTableDesc.getMeta(); TableMeta newMeta = newTableDesc.getMeta(); if (origMeta.getDataFormat().equals(newMeta.getDataFormat()) == false) { fail("Store type of input tables not equal"); return false; } KeyValueSet origOptions = origMeta.getPropertySet(); KeyValueSet newOptions = newMeta.getPropertySet(); if (origOptions.equals(newOptions) == false) { fail("Meta options of input tables not equal"); return false; } } return true; }
/** Insert row values */ private void insertRowValues( QueryContext queryContext, InsertNode insertNode, SubmitQueryResponse.Builder responseBuilder) { try { String nodeUniqName = insertNode.getTableName() == null ? new Path(insertNode.getUri()).getName() : insertNode.getTableName(); String queryId = nodeUniqName + "_" + System.currentTimeMillis(); URI finalOutputUri = insertNode.getUri(); Tablespace space = TablespaceManager.get(finalOutputUri).get(); TableMeta tableMeta = new TableMeta(insertNode.getStorageType(), insertNode.getOptions()); tableMeta.putOption(StorageConstants.INSERT_DIRECTLY, Boolean.TRUE.toString()); FormatProperty formatProperty = space.getFormatProperty(tableMeta); TaskAttemptContext taskAttemptContext; if (formatProperty .directInsertSupported()) { // if this format and storage supports direct insertion taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, null); taskAttemptContext.setOutputPath(new Path(finalOutputUri)); EvalExprExec evalExprExec = new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild()); InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec); try { exec.init(); exec.next(); } finally { exec.close(); } } else { URI stagingSpaceUri = space.prepareStagingSpace(context.getConf(), queryId, queryContext, tableMeta); Path stagingDir = new Path(stagingSpaceUri); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, stagingDir); taskAttemptContext.setOutputPath(new Path(stagingResultDir, "part-01-000000")); insertRowsThroughStaging( taskAttemptContext, insertNode, new Path(finalOutputUri), stagingDir, stagingResultDir); } // set insert stats (how many rows and bytes) TableStats stats = new TableStats(); stats.setNumBytes(taskAttemptContext.getResultStats().getNumBytes()); stats.setNumRows(taskAttemptContext.getResultStats().getNumRows()); if (insertNode.hasTargetTable()) { CatalogProtos.UpdateTableStatsProto.Builder builder = CatalogProtos.UpdateTableStatsProto.newBuilder(); builder.setTableName(insertNode.getTableName()); builder.setStats(stats.getProto()); catalog.updateTableStats(builder.build()); TableDesc desc = new TableDesc( insertNode.getTableName(), insertNode.getTargetSchema(), tableMeta, finalOutputUri); responseBuilder.setTableDesc(desc.getProto()); } else { // If INSERT INTO LOCATION // Empty TableDesc List<CatalogProtos.ColumnProto> columns = new ArrayList<CatalogProtos.ColumnProto>(); CatalogProtos.TableDescProto tableDescProto = CatalogProtos.TableDescProto.newBuilder() .setTableName(nodeUniqName) .setMeta( CatalogProtos.TableProto.newBuilder() .setStoreType(BuiltinStorages.TEXT) .build()) .setSchema(CatalogProtos.SchemaProto.newBuilder().addAllFields(columns).build()) .setStats(stats.getProto()) .build(); responseBuilder.setTableDesc(tableDescProto); } // If queryId == NULL_QUERY_ID and MaxRowNum == -1, TajoCli prints only number of inserted // rows. responseBuilder.setMaxRowNum(-1); responseBuilder.setQueryId(QueryIdFactory.NULL_QUERY_ID.getProto()); responseBuilder.setResultType(ResultType.NO_RESULT); responseBuilder.setState(OK); } catch (Throwable t) { throw new RuntimeException(t); } }
@Test public void testColumnKeyValueMapping() throws Exception { KeyValueSet keyValueSet = new KeyValueSet(); keyValueSet.set(HBaseStorageConstants.META_TABLE_KEY, "test"); keyValueSet.set(HBaseStorageConstants.META_COLUMNS_KEY, ":key,col2:key:,col2:value:#b,col3:"); Schema schema = new Schema(); schema.addColumn("c1", Type.TEXT); schema.addColumn("c2", Type.TEXT); schema.addColumn("c3", Type.TEXT); schema.addColumn("c4", Type.TEXT); TableMeta tableMeta = new TableMeta("HBASE", keyValueSet); ColumnMapping columnMapping = new ColumnMapping(schema, tableMeta.getPropertySet()); List<String> cfNames = columnMapping.getColumnFamilyNames(); assertEquals(2, cfNames.size()); assertEquals("col2", cfNames.get(0)); assertEquals("col3", cfNames.get(1)); for (int i = 0; i < columnMapping.getIsBinaryColumns().length; i++) { if (i == 2) { assertTrue(columnMapping.getIsBinaryColumns()[i]); } else { assertFalse(columnMapping.getIsBinaryColumns()[i]); } } for (int i = 0; i < columnMapping.getIsRowKeyMappings().length; i++) { if (i == 0) { assertTrue(columnMapping.getIsRowKeyMappings()[i]); } else { assertFalse(columnMapping.getIsRowKeyMappings()[i]); } } String[] expectedColumnNames = {null, null, null, null}; for (int i = 0; i < schema.size(); i++) { String columnName = columnMapping.getMappingColumns()[i][1] == null ? null : new String(columnMapping.getMappingColumns()[i][1]); assertEquals(expectedColumnNames[i], columnName); } for (int i = 0; i < schema.size(); i++) { if (i == 1) { assertTrue(columnMapping.getIsColumnKeys()[i]); } else { assertFalse(columnMapping.getIsColumnKeys()[i]); } } for (int i = 0; i < schema.size(); i++) { if (i == 2) { assertTrue(columnMapping.getIsColumnValues()[i]); } else { assertFalse(columnMapping.getIsColumnValues()[i]); } } }