Example #1
0
  @Test
  public void testSkippingHeaderWithText() throws IOException {
    TableMeta meta = CatalogUtil.newTableMeta("TEXT");
    meta.putOption(StorageConstants.TEXT_SKIP_HEADER_LINE, "1");
    meta.putOption(StorageConstants.TEXT_DELIMITER, ",");
    FileFragment fragment = getFileFragment("testSkip.txt");
    Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment);

    scanner.init();

    int lines = 0;

    try {
      while (true) {
        Tuple tuple = scanner.next();
        if (tuple != null) {
          assertEquals(17 + lines, tuple.getInt2(2));
          lines++;
        } else break;
      }
    } finally {
      assertEquals(6, lines);
      scanner.close();
    }
  }
Example #2
0
  @Test
  public void testIgnoreTruncatedValueErrorTolerance() throws IOException {
    TajoConf conf = new TajoConf();
    TableMeta meta = CatalogUtil.newTableMeta("JSON");
    meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1");
    FileFragment fragment = getFileFragment("testErrorTolerance3.json");
    Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment);
    scanner.init();

    try {
      Tuple tuple = scanner.next();
      assertNull(tuple);
    } finally {
      scanner.close();
    }
  }
Example #3
0
  @Test
  public void testNoErrorTolerance() throws IOException {
    TajoConf conf = new TajoConf();
    TableMeta meta = CatalogUtil.newTableMeta("JSON");
    meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0");
    FileFragment fragment = getFileFragment("testErrorTolerance2.json");
    Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment);
    scanner.init();

    try {
      scanner.next();
    } catch (IOException ioe) {
      return;
    } finally {
      scanner.close();
    }
    fail();
  }
Example #4
0
  @Test
  public void testIgnoreAllErrors() throws IOException {
    TajoConf conf = new TajoConf();

    TableMeta meta = CatalogUtil.newTableMeta("JSON");
    meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1");
    FileFragment fragment = getFileFragment("testErrorTolerance1.json");
    Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment);
    scanner.init();

    Tuple tuple;
    int i = 0;
    while ((tuple = scanner.next()) != null) {
      assertEquals(baseTuple, tuple);
      i++;
    }
    assertEquals(3, i);
    scanner.close();
  }
Example #5
0
  /** Insert row values */
  private void insertRowValues(
      QueryContext queryContext,
      InsertNode insertNode,
      SubmitQueryResponse.Builder responseBuilder) {
    try {
      String nodeUniqName =
          insertNode.getTableName() == null
              ? new Path(insertNode.getUri()).getName()
              : insertNode.getTableName();
      String queryId = nodeUniqName + "_" + System.currentTimeMillis();

      URI finalOutputUri = insertNode.getUri();
      Tablespace space = TablespaceManager.get(finalOutputUri).get();
      TableMeta tableMeta = new TableMeta(insertNode.getStorageType(), insertNode.getOptions());
      tableMeta.putOption(StorageConstants.INSERT_DIRECTLY, Boolean.TRUE.toString());

      FormatProperty formatProperty = space.getFormatProperty(tableMeta);

      TaskAttemptContext taskAttemptContext;
      if (formatProperty
          .directInsertSupported()) { // if this format and storage supports direct insertion
        taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, null);
        taskAttemptContext.setOutputPath(new Path(finalOutputUri));

        EvalExprExec evalExprExec =
            new EvalExprExec(taskAttemptContext, (EvalExprNode) insertNode.getChild());
        InsertRowsExec exec = new InsertRowsExec(taskAttemptContext, insertNode, evalExprExec);

        try {
          exec.init();
          exec.next();
        } finally {
          exec.close();
        }
      } else {
        URI stagingSpaceUri =
            space.prepareStagingSpace(context.getConf(), queryId, queryContext, tableMeta);
        Path stagingDir = new Path(stagingSpaceUri);
        Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);

        taskAttemptContext = new TaskAttemptContext(queryContext, null, null, null, stagingDir);
        taskAttemptContext.setOutputPath(new Path(stagingResultDir, "part-01-000000"));
        insertRowsThroughStaging(
            taskAttemptContext, insertNode, new Path(finalOutputUri), stagingDir, stagingResultDir);
      }

      // set insert stats (how many rows and bytes)
      TableStats stats = new TableStats();
      stats.setNumBytes(taskAttemptContext.getResultStats().getNumBytes());
      stats.setNumRows(taskAttemptContext.getResultStats().getNumRows());

      if (insertNode.hasTargetTable()) {
        CatalogProtos.UpdateTableStatsProto.Builder builder =
            CatalogProtos.UpdateTableStatsProto.newBuilder();
        builder.setTableName(insertNode.getTableName());
        builder.setStats(stats.getProto());

        catalog.updateTableStats(builder.build());

        TableDesc desc =
            new TableDesc(
                insertNode.getTableName(), insertNode.getTargetSchema(), tableMeta, finalOutputUri);
        responseBuilder.setTableDesc(desc.getProto());

      } else { // If INSERT INTO LOCATION

        // Empty TableDesc
        List<CatalogProtos.ColumnProto> columns = new ArrayList<CatalogProtos.ColumnProto>();
        CatalogProtos.TableDescProto tableDescProto =
            CatalogProtos.TableDescProto.newBuilder()
                .setTableName(nodeUniqName)
                .setMeta(
                    CatalogProtos.TableProto.newBuilder()
                        .setStoreType(BuiltinStorages.TEXT)
                        .build())
                .setSchema(CatalogProtos.SchemaProto.newBuilder().addAllFields(columns).build())
                .setStats(stats.getProto())
                .build();

        responseBuilder.setTableDesc(tableDescProto);
      }

      // If queryId == NULL_QUERY_ID and MaxRowNum == -1, TajoCli prints only number of inserted
      // rows.
      responseBuilder.setMaxRowNum(-1);
      responseBuilder.setQueryId(QueryIdFactory.NULL_QUERY_ID.getProto());
      responseBuilder.setResultType(ResultType.NO_RESULT);
      responseBuilder.setState(OK);
    } catch (Throwable t) {
      throw new RuntimeException(t);
    }
  }