@Test public void testSkippingHeaderWithText() throws IOException { TableMeta meta = CatalogUtil.newTableMeta("TEXT"); meta.putOption(StorageConstants.TEXT_SKIP_HEADER_LINE, "1"); meta.putOption(StorageConstants.TEXT_DELIMITER, ","); FileFragment fragment = getFileFragment("testSkip.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(17 + lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(6, lines); scanner.close(); } }
@Test public void testIgnoreTruncatedValueErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance3.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); try { Tuple tuple = scanner.next(); assertNull(tuple); } finally { scanner.close(); } }
@Test public void testNoErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0"); FileFragment fragment = getFileFragment("testErrorTolerance2.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); try { scanner.next(); } catch (IOException ioe) { return; } finally { scanner.close(); } fail(); }
@Test public void testIgnoreAllErrors() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta("JSON"); meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(3, i); scanner.close(); }
/** Insert row values */ private void insertRowValues( QueryContext queryContext, InsertNode insertNode, SubmitQueryResponse.Builder responseBuilder) { try { String nodeUniqName = insertNode.getTableName() == null ? new Path(insertNode.getUri()).getName() : insertNode.getTableName(); String queryId = nodeUniqName + "_" + System.currentTimeMillis(); URI finalOutputUri = insertNode.getUri(); Tablespace space = TablespaceManager.get(finalOutputUri).get(); TableMeta tableMeta = new TableMeta(insertNode.getStorageType(), insertNode.getOptions()); tableMeta.putOption(StorageConstants.INSERT_DIRECTLY, Boolean.TRUE.toString()); FormatProperty formatProperty = space.getFormatProperty(tableMeta); TaskAttemptContext taskAttemptContext; if (formatProperty .directInsertSupported()) { // if this format and storage supports direct insertion taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, null); taskAttemptContext.setOutputPath(new Path(finalOutputUri)); EvalExprExec evalExprExec = new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild()); InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec); try { exec.init(); exec.next(); } finally { exec.close(); } } else { URI stagingSpaceUri = space.prepareStagingSpace(context.getConf(), queryId, queryContext, tableMeta); Path stagingDir = new Path(stagingSpaceUri); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, stagingDir); taskAttemptContext.setOutputPath(new Path(stagingResultDir, "part-01-000000")); insertRowsThroughStaging( taskAttemptContext, insertNode, new Path(finalOutputUri), stagingDir, stagingResultDir); } // set insert stats (how many rows and bytes) TableStats stats = new TableStats(); stats.setNumBytes(taskAttemptContext.getResultStats().getNumBytes()); stats.setNumRows(taskAttemptContext.getResultStats().getNumRows()); if (insertNode.hasTargetTable()) { CatalogProtos.UpdateTableStatsProto.Builder builder = CatalogProtos.UpdateTableStatsProto.newBuilder(); builder.setTableName(insertNode.getTableName()); builder.setStats(stats.getProto()); catalog.updateTableStats(builder.build()); TableDesc desc = new TableDesc( insertNode.getTableName(), insertNode.getTargetSchema(), tableMeta, finalOutputUri); responseBuilder.setTableDesc(desc.getProto()); } else { // If INSERT INTO LOCATION // Empty TableDesc List<CatalogProtos.ColumnProto> columns = new ArrayList<CatalogProtos.ColumnProto>(); CatalogProtos.TableDescProto tableDescProto = CatalogProtos.TableDescProto.newBuilder() .setTableName(nodeUniqName) .setMeta( CatalogProtos.TableProto.newBuilder() .setStoreType(BuiltinStorages.TEXT) .build()) .setSchema(CatalogProtos.SchemaProto.newBuilder().addAllFields(columns).build()) .setStats(stats.getProto()) .build(); responseBuilder.setTableDesc(tableDescProto); } // If queryId == NULL_QUERY_ID and MaxRowNum == -1, TajoCli prints only number of inserted // rows. responseBuilder.setMaxRowNum(-1); responseBuilder.setQueryId(QueryIdFactory.NULL_QUERY_ID.getProto()); responseBuilder.setResultType(ResultType.NO_RESULT); responseBuilder.setState(OK); } catch (Throwable t) { throw new RuntimeException(t); } }