Пример #1
0
 @Test
 public void testCommonSortByToCriteria() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(
       new Schema("schema1", Fields.parse("a:int,b:string,c:string,blabla:string")));
   b.addIntermediateSchema(
       new Schema("schema2", Fields.parse("a:int,c:string,b:string,bloblo:string")));
   b.setGroupByFields("c", "b");
   b.setOrderBy(
       new OrderBy()
           .add("b", Order.ASC)
           .add("c", Order.DESC)
           .addSchemaOrder(Order.DESC)
           .add("a", Order.DESC));
   b.setSpecificOrderBy("schema1", new OrderBy().add("blabla", Order.DESC));
   TupleMRConfig config = b.buildConf();
   config.getSerializationInfo();
   {
     List<SortElement> expectedCommon = new ArrayList<SortElement>();
     expectedCommon.add(new SortElement("b", Order.ASC, Criteria.NullOrder.NULL_SMALLEST));
     expectedCommon.add(new SortElement("c", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
     Assert.assertEquals(new Criteria(expectedCommon), config.getCommonCriteria());
   }
   {
     List<SortElement> expectedSchema1 = new ArrayList<SortElement>();
     expectedSchema1.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
     expectedSchema1.add(new SortElement("blabla", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
     Assert.assertEquals(new Criteria(expectedSchema1), config.getSpecificOrderBys().get(0));
   }
   {
     List<SortElement> expectedSchema2 = new ArrayList<SortElement>();
     expectedSchema2.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
     Assert.assertEquals(new Criteria(expectedSchema2), config.getSpecificOrderBys().get(1));
   }
 }
Пример #2
0
 @Test(expected = TupleMRException.class)
 public void testRepeatedSchemas() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("c:int,b:string")));
   b.setGroupByFields("b");
   b.buildConf();
 }
Пример #3
0
 @Test(expected = TupleMRException.class)
 public void testGroupByFieldWithDifferentTypes() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:boolean")));
   b.setGroupByFields("b", "a");
   b.buildConf();
 }
Пример #4
0
 @Test(expected = TupleMRException.class)
 public void testNeedToDeclareCommonOrderWhenSecondary() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string")));
   b.setGroupByFields("b");
   b.setSpecificOrderBy("schema1", new OrderBy().add("a", Order.ASC));
   b.buildConf();
 }
Пример #5
0
 @Test(expected = TupleMRException.class)
 public void testCommonOrderPrefixGroupBy2() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string,c:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string,d:string")));
   b.setGroupByFields("a", "b");
   b.setOrderBy(new OrderBy().add("b", Order.ASC).addSchemaOrder(Order.DESC).add("a", Order.DESC));
   b.buildConf();
 }
Пример #6
0
  @Test
  public void testAliases1() throws TupleMRException {
    TupleMRConfigBuilder b = new TupleMRConfigBuilder();
    b.addIntermediateSchema(
        new Schema("schema1", Fields.parse("ax:int,bx:string,cx:string,blablax:string,p2:string")));
    b.addIntermediateSchema(
        new Schema("schema2", Fields.parse("ay:int,cy:string,by:string,blobloy:string,p:string")));
    {
      Aliases aliases1 = new Aliases();
      aliases1.add("a", "ax");
      aliases1.add("b", "bx");
      aliases1.add("c", "cx");
      aliases1.add("blabla", "blablax");
      aliases1.add("p", "p2");
      b.setFieldAliases("schema1", aliases1);
    }
    {
      Aliases aliases2 = new Aliases();
      aliases2.add("a", "ay");
      aliases2.add("b", "by");
      aliases2.add("c", "cy");
      aliases2.add("bloblo", "blobloy");
      b.setFieldAliases("schema2", aliases2);
    }

    b.setGroupByFields("c", "b");
    b.setOrderBy(
        new OrderBy()
            .add("b", Order.ASC)
            .add("c", Order.DESC)
            .addSchemaOrder(Order.DESC)
            .add("a", Order.DESC));
    b.setSpecificOrderBy("schema1", new OrderBy().add("blabla", Order.DESC));
    b.setCustomPartitionFields("p");
    TupleMRConfig config = b.buildConf();
    SerializationInfo serInfo = config.getSerializationInfo();
    System.out.println(serInfo.getCommonSchema());
    System.out.println(serInfo.getPartitionFieldsIndexes());

    {
      List<SortElement> expectedCommon = new ArrayList<SortElement>();
      expectedCommon.add(new SortElement("b", Order.ASC, Criteria.NullOrder.NULL_SMALLEST));
      expectedCommon.add(new SortElement("c", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
      Assert.assertEquals(new Criteria(expectedCommon), config.getCommonCriteria());
    }
    {
      List<SortElement> expectedSchema1 = new ArrayList<SortElement>();
      expectedSchema1.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
      expectedSchema1.add(new SortElement("blabla", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
      Assert.assertEquals(new Criteria(expectedSchema1), config.getSpecificOrderBys().get(0));
    }
    {
      List<SortElement> expectedSchema2 = new ArrayList<SortElement>();
      expectedSchema2.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST));
      Assert.assertEquals(new Criteria(expectedSchema2), config.getSpecificOrderBys().get(1));
    }
  }
Пример #7
0
 @Test(expected = TupleMRException.class)
 public void testFieldsRepeatedInCommonAndSecondaryOrder() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string")));
   b.setGroupByFields("b");
   b.setOrderBy(new OrderBy().add("b", Order.DESC).addSchemaOrder(Order.DESC));
   b.setSpecificOrderBy("schema1", new OrderBy().add("b", Order.ASC));
   b.buildConf();
 }
Пример #8
0
 @Test
 public void testCorrect2() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC).add("b", Order.DESC));
   TupleMRConfig conf = b.buildConf();
   conf.getSerializationInfo();
 }
Пример #9
0
 @Test(expected = TupleMRException.class)
 public void testCustomPartitionFieldsPresentWithSameType() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:long")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC));
   b.setCustomPartitionFields("b");
   b.buildConf();
 }
Пример #10
0
 @Test(expected = TupleMRException.class)
 public void testRollUpCantBeNull() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string")));
   b.setGroupByFields("b");
   b.setOrderBy(new OrderBy().add("b", Order.DESC));
   b.setRollupFrom(null);
   b.buildConf();
 }
Пример #11
0
 @Ignore
 @Test(expected = UnsupportedOperationException.class)
 public void testNotMutableConfig() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("b:string,a:int")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC));
   b.setCustomPartitionFields("b");
   b.buildConf(); // TODO
 }
Пример #12
0
 @Test(expected = TupleMRException.class)
 public void testSortFieldWithDifferentTypes1() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:boolean")));
   b.setGroupByFields("a");
   // not allowed to sort in common order by a field that has different types
   // even after source order
   // it can be confusing
   b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC).add("b", Order.DESC));
   b.buildConf();
 }
Пример #13
0
 @Test(expected = TupleMRException.class)
 public void testNotAllowedSourceOrderInSecondaryOrder() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("c:int,b:string")));
   b.setGroupByFields("b");
   b.setOrderBy(new OrderBy().add("b", Order.DESC).addSchemaOrder(Order.DESC));
   b.setSpecificOrderBy(
       "schema1", new OrderBy().add("a", Order.DESC).addSchemaOrder(Order.DESC)); // this
   // is
   // incorrect
   b.buildConf();
 }
Пример #14
0
 @Test
 public void testCustomPartition() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.addIntermediateSchema(new Schema("schema2", Fields.parse("b:string,a:int")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC));
   b.setCustomPartitionFields("b");
   TupleMRConfig config = b.buildConf();
   System.out.println(config);
   SerializationInfo serInfo = config.getSerializationInfo();
   int[] indexes0 = serInfo.getFieldsToPartition(0);
   int[] indexes1 = serInfo.getFieldsToPartition(1);
   Assert.assertArrayEquals(new int[] {1}, indexes0);
   Assert.assertArrayEquals(new int[] {0}, indexes1);
 }
Пример #15
0
 @Test(expected = TupleMRException.class)
 public void testAliasesUnknownSchema() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(
       new Schema("schema1", Fields.parse("a:int,b:string,c:string,blabla:string")));
   b.setFieldAliases("schemaX", new Aliases().add("bx", "b"));
 }
Пример #16
0
  @Test
  public void testModifying() {
    Schema schema =
        new Schema(
            "testSchema", Fields.parse("a:string, b:int, c:double, d:float, e:boolean, f:long"));
    ITuple tuple = new Tuple(schema);
    tuple.set(0, "foo");
    tuple.set(1, null);
    tuple.set(2, 20d);
    tuple.set(3, null);
    tuple.set(4, false);
    tuple.set(5, null);

    NullableTuple nullableTuple = new NullableTuple(tuple);
    assertEquals("foo", nullableTuple.getNullable(0).toString());
    assertEquals(null, nullableTuple.getNullable(1));
    assertEquals(20d, nullableTuple.getNullable(2));
    assertEquals(null, nullableTuple.getNullable(3));
    assertEquals(false, nullableTuple.getNullable(4));
    assertEquals(null, nullableTuple.getNullable(5));

    nullableTuple.set(2, null);
    nullableTuple.set(4, null);
    nullableTuple.set(0, null);
    nullableTuple.set(1, 10);
    nullableTuple.set(3, 20f);
    nullableTuple.set(5, 30l);

    assertEquals(null, nullableTuple.getNullable(0));
    assertEquals(10, nullableTuple.getNullable(1));
    assertEquals(null, nullableTuple.getNullable(2));
    assertEquals(20f, nullableTuple.getNullable(3));
    assertEquals(null, nullableTuple.getNullable(4));
    assertEquals(30l, nullableTuple.getNullable(5));
  }
Пример #17
0
 @Test(expected = TupleMRException.class)
 public void testNotAllowedSourceOrderInOneSource() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC));
   b.buildConf();
 }
Пример #18
0
 @Test
 public void testCorrectMinimal() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.setGroupByFields("a");
   TupleMRConfig conf = b.buildConf();
   conf.getSerializationInfo();
 }
Пример #19
0
 @Test(expected = TupleMRException.class)
 public void testRollupNeedsExplicitSortBy() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.setGroupByFields("b", "a");
   b.setRollupFrom("a");
   b.buildConf();
 }
Пример #20
0
  @Before
  public void init() throws TupleMRException {
    this.schema1 =
        new Schema(
            "schema1", Fields.parse("int_field:int, string_field:string,boolean_field:boolean"));
    this.schema2 =
        new Schema("schema2", Fields.parse("long_field:long,boolean_field:boolean, int_field:int"));

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("int_field", Type.INT));
    fields.add(Field.create("string_field", Type.STRING));
    fields.add(Field.create("long_field", Type.LONG));
    fields.add(Field.create("float_field", Type.FLOAT));
    fields.add(Field.create("double_field", Type.DOUBLE));
    fields.add(Field.create("boolean_field", Type.BOOLEAN));
    fields.add(Field.createEnum("enum_field", Order.class));
    fields.add(Field.createObject("thrift_field", A.class));
    this.schema3 = new Schema("schema3", fields);
  }
Пример #21
0
 @Test(expected = TupleMRException.class)
 public void testCustomPartitionFieldsNotNull() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string")));
   b.setGroupByFields("a");
   b.setOrderBy(new OrderBy().add("a", Order.ASC));
   String[] array = null;
   b.setCustomPartitionFields(array);
   b.buildConf();
 }
Пример #22
0
  @Test
  public void testDirectUseNoWrapper() {
    Schema schema = new Schema("testSchema", Fields.parse("a:string, b:int, c:double"));
    NullableTuple nullableTuple = new NullableTuple(schema);
    nullableTuple.set(0, "foo");
    nullableTuple.set("b", null);
    nullableTuple.set("c", 20d);

    assertEquals("foo", nullableTuple.getNullable("a").toString());
    assertEquals(null, nullableTuple.getNullable(1));
    assertEquals(20d, nullableTuple.getNullable(2));
  }
Пример #23
0
  @Test
  public void testWithFieldAliases() throws TupleMRException, IOException {
    TupleMRConfigBuilder b = new TupleMRConfigBuilder();
    Schema schema1 =
        new Schema(
            "schema1",
            Fields.parse("user_id:int,operation:string,age:long,timestamp:int,country:string"));
    Schema schema2 =
        new Schema("schema2", Fields.parse("id:int,op:string,another_id:int,time:int"));

    b.addIntermediateSchema(schema1);
    b.addIntermediateSchema(schema2);
    b.setFieldAliases("schema1", new Aliases().add("id", "user_id").add("op", "operation"));
    b.setFieldAliases("schema2", new Aliases().add("timestamp", "time"));
    b.setGroupByFields("id", "op");
    b.setOrderBy(
        new OrderBy()
            .add("op", Order.ASC)
            .add("id", Order.DESC)
            .addSchemaOrder(Order.DESC)
            .add("timestamp", Order.DESC));
    b.setSpecificOrderBy("schema1", new OrderBy().add("country", Order.DESC));

    TupleMRConfig conf = b.buildConf();
    Configuration hconf = new Configuration();

    TupleMRConfig.set(conf, hconf);
    TupleMRConfig deserConf = TupleMRConfig.get(hconf);
    System.out.println(conf);
    System.out.println("------------");
    System.out.println(deserConf);

    Assert.assertEquals(conf, deserConf);
    hconf = new Configuration();
    TupleMRConfig.set(deserConf, hconf);
    TupleMRConfig deserConf2 = TupleMRConfig.get(hconf);
    Assert.assertEquals(conf, deserConf2);
  }
Пример #24
0
  @Test
  public void test()
      throws TupleMRException, IOException, InterruptedException, ClassNotFoundException {
    CommonUtils.writeTXT("foo", new File("test-input"));
    HadoopUtils.deleteIfExists(FileSystem.get(getConf()), new Path("test-output"));

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    builder.addIntermediateSchema(
        new Schema("country", Fields.parse("country:string, averageSalary:int")));
    builder.addIntermediateSchema(
        new Schema("user", Fields.parse("name:string, money:int, my_country:string")));

    builder.setFieldAliases("user", new Aliases().add("country", "my_country"));
    builder.setGroupByFields("country");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).addSchemaOrder(Order.DESC));
    builder.setSpecificOrderBy("user", new OrderBy().add("money", Order.ASC));

    builder.addInput(
        new Path("test-input"),
        new HadoopInputFormat(TextInputFormat.class),
        new FirstInputProcessor());
    builder.setTupleReducer(new MyGroupHandler());
    builder.setOutput(
        new Path("test-output"),
        new HadoopOutputFormat(TextOutputFormat.class),
        NullWritable.class,
        NullWritable.class);

    Job job = builder.createJob();
    try {
      assertRun(job);
    } finally {
      builder.cleanUpInstanceFiles();
    }

    HadoopUtils.deleteIfExists(FileSystem.get(getConf()), new Path("test-output"));
    HadoopUtils.deleteIfExists(FileSystem.get(getConf()), new Path("test-input"));
  }
Пример #25
0
  @Test
  public void testNullableOfNullable() {
    Schema schema = new Schema("testSchema", Fields.parse("a:string, b:int, c:double"));
    NullableTuple nullableTuple = new NullableTuple(schema);
    nullableTuple.set(0, "foo");
    nullableTuple.set("b", null);
    nullableTuple.set("c", 20d);

    NullableTuple tuple2 = new NullableTuple(nullableTuple);
    assertEquals("foo", tuple2.getNullable("a").toString());
    assertEquals(null, tuple2.getNullable(1));
    assertEquals(20d, tuple2.getNullable(2));
    assertEquals(
        nullableTuple.getSchema().getFields().size(), tuple2.getSchema().getFields().size());
  }
Пример #26
0
 @Test
 public void testCommonOrderGeneratedImplicitlyFromGroupFields() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   b.addIntermediateSchema(
       new Schema("schema1", Fields.parse("a:int,b:string,c:string,blabla:string")));
   b.setGroupByFields("c", "b");
   TupleMRConfig config = b.buildConf();
   config.getSerializationInfo();
   {
     List<SortElement> expectedCommon = new ArrayList<SortElement>();
     expectedCommon.add(new SortElement("c", Order.ASC, Criteria.NullOrder.NULL_SMALLEST));
     expectedCommon.add(new SortElement("b", Order.ASC, Criteria.NullOrder.NULL_SMALLEST));
     Assert.assertEquals(new Criteria(expectedCommon), config.getCommonCriteria());
   }
 }
Пример #27
0
  @Override
  public int run(String[] args) throws Exception {
    // Validate params etc
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Splout Page Counts example");
    try {
      jComm.parse(args);
    } catch (ParameterException e) {
      System.err.println(e.getMessage());
      jComm.usage();
      System.exit(-1);
    }

    boolean generate = !noGenerate; // just for clarifying

    if (generateTupleFiles && deploy) {
      System.err.println("Can't run a 'dry' TupleFile generation and deploy it.");
      jComm.usage();
      System.exit(-1);
    }

    Path outPath = new Path(outputPath);
    FileSystem outFs = outPath.getFileSystem(getConf());

    if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) {
      File nativeLibs = new File("native");
      if (nativeLibs.exists()) {
        SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
      }
    }

    if (generate) {
      Path inputPath = new Path(this.inputPath);
      FileSystem inputFileSystem = inputPath.getFileSystem(conf);

      FileStatus[] fileStatuses = inputFileSystem.listStatus(inputPath);

      // define the schema that the resultant table will have: date, hour, pagename, pageviews
      final Schema tableSchema =
          new Schema(
              "pagecounts",
              Fields.parse("date:string, hour:string, pagename:string, pageviews:int"));
      // define the schema of the input files: projectcode, pagename, pageviews, bytes
      Schema fileSchema =
          new Schema(
              "pagecountsfile",
              Fields.parse("projectcode:string, pagename:string, pageviews:int, bytes:long"));

      // instantiate a TableBuilder
      TableBuilder tableBuilder = new TableBuilder(tableSchema);

      // for every input file...
      for (FileStatus fileStatus : fileStatuses) {
        String fileName = fileStatus.getPath().getName().toString();
        // strip the date and the hour from the file name
        String fileDate = fileName.split("-")[1];
        String fileHour = fileName.split("-")[2].substring(0, 2);
        // instantiate a custom RecordProcessor to process the records of this file
        PageCountsRecordProcessor recordProcessor =
            new PageCountsRecordProcessor(tableSchema, fileDate, fileHour);
        // use the tableBuilder method for adding each of the files to the mix
        tableBuilder.addCSVTextFile(
            fileStatus.getPath(),
            ' ',
            TupleTextInputFormat.NO_QUOTE_CHARACTER,
            TupleTextInputFormat.NO_ESCAPE_CHARACTER,
            false,
            false,
            TupleTextInputFormat.NO_NULL_STRING,
            fileSchema,
            recordProcessor);
      }

      // partition the dataset by pagename - which should give a fair even distribution.
      tableBuilder.partitionBy("pagename");
      // create a compound index on pagename, date so that typical queries for the dataset will be
      // fast
      tableBuilder.createIndex("pagename", "date");

      long nonExactPageSize = memoryForIndexing / 32000; // number of pages
      int pageSize = (int) Math.pow(2, (int) Math.round(Math.log(nonExactPageSize) / Math.log(2)));
      Log.info(
          "Pagesize = "
              + pageSize
              + " as memory for indexing was ["
              + memoryForIndexing
              + "] and there are 32000 pages.");

      tableBuilder.initialSQL("pragma page_size=" + pageSize);
      // insertion order is very important for optimizing query speed because it makes data be
      // co-located in disk
      tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc"));

      // instantiate a TablespaceBuilder
      TablespaceBuilder tablespaceBuilder = new TablespaceBuilder();

      // we will partition this dataset in as many partitions as:
      tablespaceBuilder.setNPartitions(nPartitions);
      tablespaceBuilder.add(tableBuilder.build());
      // we turn a specific SQLite pragma on for making autocomplete queries fast
      tablespaceBuilder.initStatements("pragma case_sensitive_like=true;");

      HadoopUtils.deleteIfExists(outFs, outPath);

      // finally, instantiate a TablespaceGenerator and execute it
      TablespaceGenerator tablespaceViewBuilder;

      if (generateTupleFiles) {
        // we subclass TablespaceGenerator to be able to run the generation without outputting the
        // SQLite stores, for
        // benchmark comparisons.
        // In the future this feature may be useful in general for debugging store creation.
        tablespaceViewBuilder =
            new TablespaceGenerator(tablespaceBuilder.build(), outPath) {

              @Override
              public void generateView(
                  Configuration conf, SamplingType samplingType, SamplingOptions samplingOptions)
                  throws Exception {

                prepareOutput(conf);
                final int nPartitions = tablespace.getnPartitions();
                if (nPartitions > 1) {
                  partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
                } else {
                  partitionMap = PartitionMap.oneShardOpenedMap();
                }
                writeOutputMetadata(conf);

                TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
                // Set a TupleOutput here instead of SQLiteOutput
                builder.setOutput(
                    new Path(outputPath, OUT_STORE),
                    new TupleOutputFormat(tableSchema),
                    ITuple.class,
                    NullWritable.class);
                Job job = builder.createJob();
                executeViewGeneration(job);
              }
            };
      } else {
        // ... otherwise a standard TablespaceGenerator is used.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath);
      }

      tablespaceViewBuilder.generateView(
          getConf(), SamplingType.RESERVOIR, new TupleSampler.DefaultSamplingOptions());
    }

    if (deploy) {
      // use StoreDeployerTool for deploying the already generated dataset
      StoreDeployerTool deployer = new StoreDeployerTool(qnode, getConf());
      ArrayList<TablespaceDepSpec> deployments = new ArrayList<TablespaceDepSpec>();
      deployments.add(new TablespaceDepSpec("pagecounts", outPath.toString(), repFactor, null));
      deployer.deploy(deployments);
    }
    return 1;
  }