Java OrderBy примеры использования

Язык программирования: Java

Пространство имен/Пакет: com.datasalt.pangool.tuplemr

Класс/Тип: OrderBy

Примеров на hotexamples.com: 3

Java OrderBy - 3 примера найдено. Это лучшие примеры Java кода для com.datasalt.pangool.tuplemr.OrderBy, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getElements(2)

parse(1)

Пример #1

Показать файл

Файл: TestComparators.java Проект: AnthonyNystrom/pangool

  @Test
  public void testCrossValidationOneSchema() throws TupleMRException, IOException {
    Configuration conf = getConf();

    int maxIndex = SCHEMA.getFields().size() - 1;

    for (int randomSchema = 0; randomSchema < MAX_RANDOM_SCHEMAS; randomSchema++) {
      Schema schema = permuteSchema(SCHEMA);
      OrderBy sortCriteria = createRandomSortCriteria(schema, maxIndex + 1);
      // TODO could we get empty group fields ??
      String[] groupFields =
          getFirstFields(sortCriteria, 1 + random.nextInt(sortCriteria.getElements().size() - 1));
      ITuple[] tuples = new ITuple[] {new Tuple(schema), new Tuple(schema)};
      for (ITuple tuple : tuples) {
        fillTuple(false, tuple, 0, maxIndex);
      }

      for (int minIndex = maxIndex; minIndex >= 0; minIndex--) {
        /* trick for speeding up the tests */
        DCUtils.cleanupTemporaryInstanceCache(conf, "comparator.dat");
        TupleMRConfigBuilder builder = new TupleMRConfigBuilder();
        builder.addIntermediateSchema(schema);
        builder.setGroupByFields(groupFields);
        builder.setOrderBy(sortCriteria);

        TupleMRConfig tupleMRConf = builder.buildConf();
        TupleMRConfig.set(tupleMRConf, conf);

        // tupleMRConf has changed -> we need a new Serialization object
        ser = new HadoopSerialization(conf);

        SortComparator sortComparator = new SortComparator();
        GroupComparator groupComparator = new GroupComparator();

        sortComparator.setConf(conf);
        groupComparator.setConf(conf);

        for (ITuple tuple : tuples) {
          fillTuple(true, tuple, minIndex, maxIndex);
        }
        for (int indexTuple1 = 0; indexTuple1 < tuples.length; indexTuple1++) {
          for (int indexTuple2 = indexTuple1 + 1; indexTuple2 < tuples.length; indexTuple2++) {
            ITuple tuple1 = tuples[indexTuple1];
            ITuple tuple2 = tuples[indexTuple2];
            assertSameComparison("Sort comparator", sortComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(sortComparator, tuple1, tuple2);
            assertSameComparison("Group comparator", groupComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(groupComparator, tuple1, tuple2);
          }
        }
      }
    }
  }

Пример #2

Показать файл

Файл: TestComparators.java Проект: AnthonyNystrom/pangool

 protected static String[] getFirstFields(OrderBy sortCriteria, int numFields) {
   String[] result = new String[numFields];
   for (int i = 0; i < numFields; i++) {
     SortElement element = sortCriteria.getElements().get(i);
     result[i] = element.getName();
   }
   return result;
 }

Пример #3

Показать файл

Файл: PageCountsExample.java Проект: yuanke/splout-db

  @Override
  public int run(String[] args) throws Exception {
    // Validate params etc
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Splout Page Counts example");
    try {
      jComm.parse(args);
    } catch (ParameterException e) {
      System.err.println(e.getMessage());
      jComm.usage();
      System.exit(-1);
    }

    boolean generate = !noGenerate; // just for clarifying

    if (generateTupleFiles && deploy) {
      System.err.println("Can't run a 'dry' TupleFile generation and deploy it.");
      jComm.usage();
      System.exit(-1);
    }

    Path outPath = new Path(outputPath);
    FileSystem outFs = outPath.getFileSystem(getConf());

    if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) {
      File nativeLibs = new File("native");
      if (nativeLibs.exists()) {
        SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
      }
    }

    if (generate) {
      Path inputPath = new Path(this.inputPath);
      FileSystem inputFileSystem = inputPath.getFileSystem(conf);

      FileStatus[] fileStatuses = inputFileSystem.listStatus(inputPath);

      // define the schema that the resultant table will have: date, hour, pagename, pageviews
      final Schema tableSchema =
          new Schema(
              "pagecounts",
              Fields.parse("date:string, hour:string, pagename:string, pageviews:int"));
      // define the schema of the input files: projectcode, pagename, pageviews, bytes
      Schema fileSchema =
          new Schema(
              "pagecountsfile",
              Fields.parse("projectcode:string, pagename:string, pageviews:int, bytes:long"));

      // instantiate a TableBuilder
      TableBuilder tableBuilder = new TableBuilder(tableSchema);

      // for every input file...
      for (FileStatus fileStatus : fileStatuses) {
        String fileName = fileStatus.getPath().getName().toString();
        // strip the date and the hour from the file name
        String fileDate = fileName.split("-")[1];
        String fileHour = fileName.split("-")[2].substring(0, 2);
        // instantiate a custom RecordProcessor to process the records of this file
        PageCountsRecordProcessor recordProcessor =
            new PageCountsRecordProcessor(tableSchema, fileDate, fileHour);
        // use the tableBuilder method for adding each of the files to the mix
        tableBuilder.addCSVTextFile(
            fileStatus.getPath(),
            ' ',
            TupleTextInputFormat.NO_QUOTE_CHARACTER,
            TupleTextInputFormat.NO_ESCAPE_CHARACTER,
            false,
            false,
            TupleTextInputFormat.NO_NULL_STRING,
            fileSchema,
            recordProcessor);
      }

      // partition the dataset by pagename - which should give a fair even distribution.
      tableBuilder.partitionBy("pagename");
      // create a compound index on pagename, date so that typical queries for the dataset will be
      // fast
      tableBuilder.createIndex("pagename", "date");

      long nonExactPageSize = memoryForIndexing / 32000; // number of pages
      int pageSize = (int) Math.pow(2, (int) Math.round(Math.log(nonExactPageSize) / Math.log(2)));
      Log.info(
          "Pagesize = "
              + pageSize
              + " as memory for indexing was ["
              + memoryForIndexing
              + "] and there are 32000 pages.");

      tableBuilder.initialSQL("pragma page_size=" + pageSize);
      // insertion order is very important for optimizing query speed because it makes data be
      // co-located in disk
      tableBuilder.insertionSortOrder(OrderBy.parse("pagename:asc, date:asc"));

      // instantiate a TablespaceBuilder
      TablespaceBuilder tablespaceBuilder = new TablespaceBuilder();

      // we will partition this dataset in as many partitions as:
      tablespaceBuilder.setNPartitions(nPartitions);
      tablespaceBuilder.add(tableBuilder.build());
      // we turn a specific SQLite pragma on for making autocomplete queries fast
      tablespaceBuilder.initStatements("pragma case_sensitive_like=true;");

      HadoopUtils.deleteIfExists(outFs, outPath);

      // finally, instantiate a TablespaceGenerator and execute it
      TablespaceGenerator tablespaceViewBuilder;

      if (generateTupleFiles) {
        // we subclass TablespaceGenerator to be able to run the generation without outputting the
        // SQLite stores, for
        // benchmark comparisons.
        // In the future this feature may be useful in general for debugging store creation.
        tablespaceViewBuilder =
            new TablespaceGenerator(tablespaceBuilder.build(), outPath) {

              @Override
              public void generateView(
                  Configuration conf, SamplingType samplingType, SamplingOptions samplingOptions)
                  throws Exception {

                prepareOutput(conf);
                final int nPartitions = tablespace.getnPartitions();
                if (nPartitions > 1) {
                  partitionMap = sample(nPartitions, conf, samplingType, samplingOptions);
                } else {
                  partitionMap = PartitionMap.oneShardOpenedMap();
                }
                writeOutputMetadata(conf);

                TupleMRBuilder builder = createMRBuilder(nPartitions, conf);
                // Set a TupleOutput here instead of SQLiteOutput
                builder.setOutput(
                    new Path(outputPath, OUT_STORE),
                    new TupleOutputFormat(tableSchema),
                    ITuple.class,
                    NullWritable.class);
                Job job = builder.createJob();
                executeViewGeneration(job);
              }
            };
      } else {
        // ... otherwise a standard TablespaceGenerator is used.
        tablespaceViewBuilder = new TablespaceGenerator(tablespaceBuilder.build(), outPath);
      }

      tablespaceViewBuilder.generateView(
          getConf(), SamplingType.RESERVOIR, new TupleSampler.DefaultSamplingOptions());
    }

    if (deploy) {
      // use StoreDeployerTool for deploying the already generated dataset
      StoreDeployerTool deployer = new StoreDeployerTool(qnode, getConf());
      ArrayList<TablespaceDepSpec> deployments = new ArrayList<TablespaceDepSpec>();
      deployments.add(new TablespaceDepSpec("pagecounts", outPath.toString(), repFactor, null));
      deployer.deploy(deployments);
    }
    return 1;
  }